toker.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. #include "std.h"
  2. #include "toker.h"
  3. struct Stricmp{
  4. bool operator()( const char *x,const char *y )const{
  5. while( tolower(*x)==tolower(*y) && *x ){++x;++y;}
  6. return tolower(*x)-tolower(*y)<0;
  7. }
  8. };
  9. typedef map<const char*,int,Stricmp> TokeMap;
  10. static TokeMap _tokes;
  11. static void initTokes(){
  12. if( _tokes.size() ) return;
  13. _tokes["Strict"]=T_STRICT;
  14. _tokes["SuperStrict"]=T_SUPERSTRICT;
  15. _tokes["Module"]=T_MODULE;
  16. _tokes["Framework"]=T_FRAMEWORK;
  17. _tokes["Import"]=T_IMPORT;
  18. _tokes["ModuleInfo"]=T_MODULEINFO;
  19. _tokes["DefData"]=T_DEFDATA;
  20. _tokes["ReadData"]=T_READDATA;
  21. _tokes["RestoreData"]=T_RESTOREDATA;
  22. _tokes["Rem"]=T_REM;
  23. _tokes["EndRem"]=T_ENDREM;
  24. _tokes["Try"]=T_TRY;
  25. _tokes["Catch"]=T_CATCH;
  26. _tokes["EndTry"]=T_ENDTRY;
  27. _tokes["Throw"]=T_THROW;
  28. _tokes["Goto"]=T_GOTO;
  29. _tokes["True"]=T_TRUE;
  30. _tokes["False"]=T_FALSE;
  31. _tokes["Pi"]=T_PI;
  32. _tokes["Byte"]=T_BYTE;
  33. _tokes["Short"]=T_SHORT;
  34. _tokes["Int"]=T_INT;
  35. _tokes["Long"]=T_LONG;
  36. _tokes["Float"]=T_FLOAT;
  37. _tokes["Double"]=T_DOUBLE;
  38. _tokes["Object"]=T_OBJECT;
  39. _tokes["String"]=T_STRING;
  40. _tokes["Var"]=T_VAR;
  41. _tokes["Ptr"]=T_PTR;
  42. _tokes["VarPtr"]=T_VARPTR;
  43. _tokes["Chr"]=T_CHR;
  44. _tokes["Len"]=T_LEN;
  45. _tokes["Asc"]=T_ASC;
  46. _tokes["SizeOf"]=T_SIZEOF;
  47. _tokes["Sgn"]=T_SGN;
  48. _tokes["Abs"]=T_ABS;
  49. _tokes["Min"]=T_MIN;
  50. _tokes["Max"]=T_MAX;
  51. _tokes["Mod"]=T_MOD;
  52. _tokes["Shl"]=T_SHL;
  53. _tokes["Shr"]=T_SHR;
  54. _tokes["Sar"]=T_SAR;
  55. _tokes["Not"]=T_NOT;
  56. _tokes["And"]=T_AND;
  57. _tokes["Or"]=T_OR;
  58. _tokes["Return"]=T_RETURN;
  59. _tokes["Local"]=T_LOCAL;
  60. _tokes["Global"]=T_GLOBAL;
  61. _tokes["Const"]=T_CONST;
  62. _tokes["Field"]=T_FIELD;
  63. _tokes["Alias"]=T_ALIAS;
  64. _tokes["End"]=T_END;
  65. _tokes["Type"]=T_TYPE;
  66. _tokes["EndType"]=T_ENDTYPE;
  67. _tokes["Extends"]=T_EXTENDS;
  68. _tokes["Method"]=T_METHOD;
  69. _tokes["EndMethod"]=T_ENDMETHOD;
  70. _tokes["Abstract"]=T_ABSTRACT;
  71. _tokes["Final"]=T_FINAL;
  72. _tokes["Function"]=T_FUNCTION;
  73. _tokes["EndFunction"]=T_ENDFUNCTION;
  74. _tokes["New"]=T_NEW;
  75. _tokes["Release"]=T_RELEASE;
  76. _tokes["Delete"]=T_DELETE;
  77. _tokes["Null"]=T_NULL;
  78. _tokes["Self"]=T_SELF;
  79. _tokes["Super"]=T_SUPER;
  80. _tokes["Incbin"]=T_INCBIN;
  81. _tokes["IncbinPtr"]=T_INCBINPTR;
  82. _tokes["IncbinLen"]=T_INCBINLEN;
  83. _tokes["Include"]=T_INCLUDE;
  84. _tokes["Extern"]=T_EXTERN;
  85. _tokes["EndExtern"]=T_ENDEXTERN;
  86. _tokes["Public"]=T_PUBLIC;
  87. _tokes["Private"]=T_PRIVATE;
  88. _tokes["If"]=T_IF;
  89. _tokes["Then"]=T_THEN;
  90. _tokes["Else"]=T_ELSE;
  91. _tokes["ElseIf"]=T_ELSEIF;
  92. _tokes["EndIf"]=T_ENDIF;
  93. _tokes["For"]=T_FOR;
  94. _tokes["To"]=T_TO;
  95. _tokes["Step"]=T_STEP;
  96. _tokes["Next"]=T_NEXT;
  97. _tokes["EachIn"]=T_EACHIN;
  98. _tokes["While"]=T_WHILE;
  99. _tokes["EndWhile"]=T_WEND;
  100. _tokes["Wend"]=T_WEND;
  101. _tokes["Repeat"]=T_REPEAT;
  102. _tokes["Until"]=T_UNTIL;
  103. _tokes["Forever"]=T_FOREVER;
  104. _tokes["Select"]=T_SELECT;
  105. _tokes["Case"]=T_CASE;
  106. _tokes["Default"]=T__DEFAULT;
  107. _tokes["EndSelect"]=T_ENDSELECT;
  108. _tokes["Continue"]=T_CONTINUE;
  109. _tokes["Exit"]=T_EXIT;
  110. _tokes["Assert"]=T_ASSERT;
  111. _tokes["NoDebug"]=T_NODEBUG;
  112. }
  113. static Toke nextToke( const vector<char> &line,int &p ){
  114. int b=p;
  115. int c=line[p++];
  116. int cur=c;
  117. if( isalpha(c) || c=='_' ){
  118. while( isalnum( c=line[p] ) || c=='_' ) ++p;
  119. cur=T_IDENT;
  120. string t( &line[b],p-b );
  121. TokeMap::iterator it=_tokes.find(t.c_str());
  122. if( it!=_tokes.end() ){
  123. cur=it->second;
  124. if( cur==T_END && line[p]==' ' && isalpha(line[p+1]) ){
  125. int st=p+1,en=p+2;
  126. while( isalpha(line[en]) ) ++en;
  127. string t="end"+string( &line[st],en-st );
  128. it=_tokes.find(t.c_str());
  129. if( it!=_tokes.end() ){
  130. cur=it->second;
  131. p=en;
  132. }
  133. }
  134. }
  135. }else if( isdigit(c) || (c=='.' && isdigit(line[p])) ){
  136. cur=T_INTCONST;
  137. if( c=='.' ){
  138. ++p;cur=T_FLOATCONST;
  139. }
  140. while( isdigit(line[p]) ) ++p;
  141. if( cur==T_INTCONST && line[p]=='.' && isdigit(line[p+1]) ){
  142. p+=2;cur=T_FLOATCONST;
  143. while( isdigit(line[p]) ) ++p;
  144. }
  145. if( tolower(line[p])=='e' && (line[p+1]=='+'||line[p+1]=='-'||isdigit(line[p+1])) ){
  146. ++p;cur=T_FLOATCONST;
  147. if( !isdigit(line[p]) ) ++p;
  148. while( isdigit(line[p]) ) ++p;
  149. }
  150. }else if( c=='$' && isxdigit(line[p]) ){
  151. ++p;cur=T_INTCONST;
  152. while( isxdigit(line[p]) ) ++p;
  153. }else if( c=='%' && (line[p]=='0'||line[p]=='1') ){
  154. ++p;cur=T_INTCONST;
  155. while( line[p]=='0' || line[p]=='1' ) ++p;
  156. }else if( c=='$' && tolower(line[p])=='z' ){
  157. ++p;cur=T_CSTRING;
  158. }else if( c=='$' && tolower(line[p])=='w' ){
  159. ++p;cur=T_WSTRING;
  160. }else if( c=='\"' ){ //string const
  161. while( line[p]!='\"' && line[p]!='\n' ) ++p;
  162. if( line[p]=='\"' ){
  163. cur=T_STRINGCONST;
  164. ++p;
  165. }else{
  166. cur=T_BADSTRINGCONST;
  167. }
  168. /*
  169. cur=T_STRINGCONST;
  170. while( line[p]!='\"' && line[p]!='\n' ) ++p;
  171. if( line[p++]!='\"' ) cur=T_BADSTRINGCONST;
  172. */
  173. }else if( c=='<' ){ //comparison
  174. switch( line[p++] ){
  175. case '=':cur=T_LE;break;
  176. case '>':cur=T_NE;break;
  177. default:cur=T_LT;--p;
  178. }
  179. }else if( c=='=' ){ //comparison
  180. switch( line[p++] ){
  181. case '>':cur=T_GE;break;
  182. case '<':cur=T_LE;break;
  183. default:cur=T_EQ;--p;
  184. }
  185. }else if( c=='>' ){ //comparison
  186. switch( line[p++] ){
  187. case '=':cur=T_GE;break;
  188. case '<':cur=T_NE;break;
  189. default:cur=T_GT;--p;
  190. }
  191. }else if( c==':' ){
  192. Toke t=nextToke( line,p );
  193. switch( t.toke ){
  194. case '+':cur=T_ADDASSIGN;break;
  195. case '-':cur=T_SUBASSIGN;break;
  196. case '*':cur=T_MULASSIGN;break;
  197. case '/':cur=T_DIVASSIGN;break;
  198. case '|':cur=T_ORASSIGN;break;
  199. case '&':cur=T_ANDASSIGN;break;
  200. case '~':cur=T_XORASSIGN;break;
  201. case T_MOD:cur=T_MODASSIGN;break;
  202. case T_SHL:cur=T_SHLASSIGN;break;
  203. case T_SHR:cur=T_SHRASSIGN;break;
  204. case T_SAR:cur=T_SARASSIGN;break;
  205. default:p=b+1;
  206. }
  207. }else if( c=='.' && line[p]=='.' ){
  208. ++p;cur=T_DOTDOT;
  209. }else if( c=='[' ){ //allow spaces in [,] type tokes
  210. while( line[p]==' ' || line[p]==',' ) ++p;
  211. if( line[p]==']' ){
  212. ++p;cur=T_ARRAYDECL;
  213. }else{
  214. p=b+1;
  215. }
  216. }
  217. return Toke( cur,b,p );
  218. }
  219. Toker::Toker( string f ):fh(0),toke_index(0),line_num(0),file_name(f){
  220. initTokes();
  221. fh=fopen( file_name.c_str(),"rb" );
  222. if( !fh ) fail( "Unable to open file '%s'",file_name.c_str() );
  223. encoding=UNK;
  224. next();
  225. }
  226. void Toker::close(){
  227. if( fh ){
  228. fclose( fh );
  229. fh=0;
  230. }
  231. }
  232. string Toker::sourceFile(){
  233. return file_name;
  234. }
  235. string Toker::sourceInfo(){
  236. return file_name+";"+fromint(line_num)+";"+fromint(curr_toke.begin+1);
  237. }
  238. int Toker::curr(){
  239. return curr_toke.toke;
  240. }
  241. string Toker::text(){
  242. return string( &line[curr_toke.begin],curr_toke.end-curr_toke.begin );
  243. }
  244. bstring Toker::wtext(){
  245. return bstring( &wline[curr_toke.begin],curr_toke.end-curr_toke.begin );
  246. }
  247. int Toker::peek( int n ){
  248. assert( toke_index+n<tokes.size() );
  249. return tokes[toke_index+n].toke;
  250. }
  251. int Toker::tgetc(){
  252. int c=fgetc(fh),d,e;
  253. if( c==EOF ) return c;
  254. switch( encoding ){
  255. case UNK:
  256. d=fgetc(fh);
  257. if( c==0xfe && d==0xff ){
  258. encoding=UTF16BE;
  259. }else if( c==0xff && d==0xfe ){
  260. encoding=UTF16LE;
  261. }else if( c==0xef && d==0xbb ){
  262. e=fgetc(fh);
  263. if( e==0xbf ){
  264. encoding=UTF8;
  265. }else{
  266. ungetc( e,fh );
  267. }
  268. }
  269. if( encoding==UNK ){
  270. encoding=LATIN1;
  271. ungetc( d,fh );
  272. ungetc( c,fh );
  273. }
  274. return tgetc();
  275. case LATIN1:
  276. return c;
  277. case UTF8:
  278. if( c<128 ){
  279. return c;
  280. }
  281. d=fgetc(fh);
  282. if( c<224 ){
  283. return (c-192)*64+(d-128);
  284. }
  285. e=fgetc(fh);
  286. if( c<240 ){
  287. return (c-224)*4096+(d-128)*64+(e-128);
  288. }
  289. return 0;
  290. case UTF16BE:
  291. return ((c&0xff)<<8)|(fgetc(fh)&0xff);
  292. case UTF16LE:
  293. return ((fgetc(fh)&0xff)<<8)|(c&0xff);
  294. }
  295. cout<<"Here!"<<endl;
  296. return ' ';
  297. }
  298. void Toker::nextLine(){
  299. ++line_num;
  300. line.clear();
  301. wline.clear();
  302. tokes.clear();
  303. if( !fh ){
  304. tokes.push_back( Toke(EOF,0,0) );
  305. return;
  306. }
  307. for(;;){
  308. int c=tgetc();
  309. if( c=='\n' || c==EOF ){
  310. if( c==EOF ) close();
  311. line.push_back( '\n' );
  312. wline.push_back( '\n' );
  313. break;
  314. }
  315. line.push_back( (c>32 && c<127) ? c : ' ' );
  316. wline.push_back(c);
  317. }
  318. int p=0;
  319. for(;;){
  320. int c=line[p];
  321. if( c=='\'' || c=='\n' ){
  322. if( tokes.size() && tokes.back().toke==T_DOTDOT ){
  323. tokes.pop_back();
  324. break;
  325. }
  326. tokes.push_back( Toke('\n',p,line.size()) );
  327. break;
  328. }else if( isgraph(c) ){
  329. tokes.push_back( nextToke(line,p) );
  330. }else{
  331. ++p;
  332. }
  333. }
  334. }
  335. int Toker::next(){
  336. if( curr()==EOF ) return EOF;
  337. while( toke_index==tokes.size() ){
  338. nextLine();
  339. toke_index=0;
  340. for(;;){
  341. if( !tokes.size() ){
  342. nextLine();
  343. }else if( tokes[0].toke=='?' ){
  344. ++toke_index;
  345. bool cc=true,cNot=false;
  346. if( toke_index<tokes.size() && tokes[toke_index].toke==T_NOT ){
  347. ++toke_index;
  348. cNot=true;
  349. }
  350. if( toke_index<tokes.size() && tokes[toke_index].toke==T_IDENT ){
  351. string id=string( &line[tokes[toke_index].begin],tokes[toke_index].end-tokes[toke_index].begin );
  352. ++toke_index;
  353. cc=env_config.count( tolower(id) );
  354. }
  355. if( cNot ) cc=!cc;
  356. if( cc ) break;
  357. do{
  358. nextLine();
  359. }while( tokes[0].toke!=EOF && tokes[0].toke!='?' );
  360. toke_index=0;
  361. }else if( tokes[0].toke==T_REM ){
  362. do{
  363. nextLine();
  364. }while( tokes[0].toke!=EOF && tokes[0].toke!=T_ENDREM );
  365. if( tokes[0].toke==EOF ) break;
  366. nextLine();
  367. }else{
  368. break;
  369. }
  370. }
  371. }
  372. curr_toke=tokes[toke_index++];
  373. return curr();
  374. }
  375. string Toker::toString( int n ){
  376. switch( n ){
  377. case '\n':return "end-of-line";
  378. case EOF:return "end-of-file";
  379. case T_LT:return "'<'";
  380. case T_GT:return "'>'";
  381. case T_LE:return "'<='";
  382. case T_GE:return "'>='";
  383. case T_EQ:return "'='";
  384. case T_NE:return "'<>'";
  385. case T_DOTDOT:return "'..'";
  386. case T_IDENT:return "identifier";
  387. case T_INTCONST:return "integer literal";
  388. case T_FLOATCONST:return "floating point literal";
  389. case T_STRINGCONST:return "string literal";
  390. case T_CSTRING:return "cstring tag";
  391. case T_WSTRING:return "wstring tag";
  392. case T_ARRAYDECL:return "array declaration";
  393. case T_BADSTRINGCONST:return "malformed string literal";
  394. case T_ADDASSIGN:return "add assign";
  395. case T_SUBASSIGN:return "subtract assign";
  396. case T_MULASSIGN:return "multiply assign";
  397. case T_DIVASSIGN:return "divide assign";
  398. case T_MODASSIGN:return "remainder assign";
  399. case T_ORASSIGN:return "or assign";
  400. case T_ANDASSIGN:return "and assign";
  401. case T_XORASSIGN:return "exclusive or assign";
  402. case T_SHLASSIGN:return "shift left assign";
  403. case T_SHRASSIGN:return "shift right assign";
  404. case T_SARASSIGN:return "Shift arithmetic right assign";
  405. }
  406. TokeMap::iterator it;
  407. for( it=_tokes.begin();it!=_tokes.end();++it ){
  408. if( n==it->second ) return it->first;
  409. }
  410. if( isgraph(n) ){
  411. char c=n;
  412. return "'"+string(&c,1)+"'";
  413. }
  414. char buf[8];
  415. sprintf( buf,"%i",n );
  416. return "<chr:"+string(buf)+">";
  417. }