lex.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. char *rcs_lex = "$Id: lex.c,v 2.46 1997/04/07 14:48:53 roberto Exp roberto $";
  2. #include <ctype.h>
  3. #include <string.h>
  4. #include "auxlib.h"
  5. #include "luamem.h"
  6. #include "tree.h"
  7. #include "table.h"
  8. #include "lex.h"
  9. #include "inout.h"
  10. #include "luadebug.h"
  11. #include "parser.h"
  12. #define MINBUFF 250
  13. #define next() (current = input())
  14. #define save(x) (yytext[tokensize++] = (x))
  15. #define save_and_next() (save(current), next())
  16. static int current; /* look ahead character */
  17. static Input input; /* input function */
  18. #define MAX_IFS 10
  19. /* "ifstate" keeps the state of each nested $if the lexical is
  20. ** dealing with. The first bit indicates whether the $if condition
  21. ** is false or true. The second bit indicates whether the lexical is
  22. ** inside the "then" part (0) or the "else" part (2)
  23. */
  24. static int ifstate[MAX_IFS]; /* 0 => then part - condition false */
  25. /* 1 => then part - condition true */
  26. /* 2 => else part - condition false */
  27. /* 3 => else part - condition true */
  28. static int iflevel; /* level of nested $if's */
  29. void lua_setinput (Input fn)
  30. {
  31. current = '\n';
  32. lua_linenumber = 0;
  33. iflevel = 0;
  34. input = fn;
  35. }
  36. static void luaI_auxsyntaxerror (char *s)
  37. {
  38. luaL_verror("%s;\n> at line %d in file %s",
  39. s, lua_linenumber, lua_parsedfile);
  40. }
  41. static void luaI_auxsynterrbf (char *s, char *token)
  42. {
  43. if (token[0] == 0)
  44. token = "<eof>";
  45. luaL_verror("%s;\n> last token read: \"%s\" at line %d in file %s",
  46. s, token, lua_linenumber, lua_parsedfile);
  47. }
  48. void luaI_syntaxerror (char *s)
  49. {
  50. luaI_auxsynterrbf(s, luaI_buffer(1));
  51. }
  52. static struct
  53. {
  54. char *name;
  55. int token;
  56. } reserved [] = {
  57. {"and", AND},
  58. {"do", DO},
  59. {"else", ELSE},
  60. {"elseif", ELSEIF},
  61. {"end", END},
  62. {"function", FUNCTION},
  63. {"if", IF},
  64. {"local", LOCAL},
  65. {"nil", NIL},
  66. {"not", NOT},
  67. {"or", OR},
  68. {"repeat", REPEAT},
  69. {"return", RETURN},
  70. {"then", THEN},
  71. {"until", UNTIL},
  72. {"while", WHILE} };
  73. #define RESERVEDSIZE (sizeof(reserved)/sizeof(reserved[0]))
  74. void luaI_addReserved (void)
  75. {
  76. int i;
  77. for (i=0; i<RESERVEDSIZE; i++)
  78. {
  79. TaggedString *ts = lua_createstring(reserved[i].name);
  80. ts->marked = reserved[i].token; /* reserved word (always > 255) */
  81. }
  82. }
  83. /*
  84. ** Pragma handling
  85. */
  86. #define PRAGMASIZE 20
  87. static void skipspace (void)
  88. {
  89. while (current == ' ' || current == '\t') next();
  90. }
  91. static int checkcond (char *buff)
  92. {
  93. if (strcmp(buff, "nil") == 0)
  94. return 0;
  95. else if (strcmp(buff, "1") == 0)
  96. return 1;
  97. else if (isalpha((unsigned char)buff[0]))
  98. return luaI_globaldefined(buff);
  99. else {
  100. luaI_auxsynterrbf("invalid $if condition", buff);
  101. return 0; /* to avoid warnings */
  102. }
  103. }
  104. static void readname (char *buff)
  105. {
  106. int i = 0;
  107. skipspace();
  108. while (isalnum((unsigned char)current)) {
  109. if (i >= PRAGMASIZE) {
  110. buff[PRAGMASIZE] = 0;
  111. luaI_auxsynterrbf("pragma too long", buff);
  112. }
  113. buff[i++] = current;
  114. next();
  115. }
  116. buff[i] = 0;
  117. }
  118. static void inclinenumber (void);
  119. static void ifskip (int thisiflevel)
  120. {
  121. while (iflevel > thisiflevel &&
  122. (ifstate[thisiflevel] == 0 || ifstate[thisiflevel] == 3)) {
  123. if (current == '\n')
  124. inclinenumber();
  125. else if (current == 0)
  126. luaI_auxsyntaxerror("input ends inside a $if");
  127. else next();
  128. }
  129. }
  130. static void inclinenumber (void)
  131. {
  132. static char *pragmas [] =
  133. {"debug", "nodebug", "end", "ifnot", "if", "else", NULL};
  134. next(); /* skip '\n' */
  135. ++lua_linenumber;
  136. if (current == '$') { /* is a pragma? */
  137. char buff[PRAGMASIZE+1];
  138. int ifnot = 0;
  139. next(); /* skip $ */
  140. readname(buff);
  141. switch (luaI_findstring(buff, pragmas)) {
  142. case 0: /* debug */
  143. lua_debug = 1;
  144. break;
  145. case 1: /* nodebug */
  146. lua_debug = 0;
  147. break;
  148. case 2: /* end */
  149. if (--iflevel < 0)
  150. luaI_auxsyntaxerror("unmatched $endif");
  151. break;
  152. case 3: /* ifnot */
  153. ifnot = 1;
  154. /* go through */
  155. case 4: /* if */
  156. if (iflevel == MAX_IFS)
  157. luaI_auxsyntaxerror("too many nested `$ifs'");
  158. readname(buff);
  159. ifstate[iflevel++] = checkcond(buff) ? !ifnot : ifnot;
  160. break;
  161. case 5: /* else */
  162. if (iflevel <= 0 || (ifstate[iflevel-1] & 2))
  163. luaI_auxsyntaxerror("unmatched $else");
  164. ifstate[iflevel-1] = ifstate[iflevel-1] | 2;
  165. break;
  166. default:
  167. luaI_auxsynterrbf("invalid pragma", buff);
  168. }
  169. skipspace();
  170. if (current == '\n') /* pragma must end with a '\n' */
  171. inclinenumber();
  172. else if (current != 0) /* or eof */
  173. luaI_auxsyntaxerror("invalid pragma format");
  174. if (iflevel > 0)
  175. ifskip(iflevel-1);
  176. }
  177. }
  178. static int read_long_string (char *yytext, int buffsize)
  179. {
  180. int cont = 0;
  181. int tokensize = 2; /* '[[' already stored */
  182. while (1)
  183. {
  184. if (buffsize-tokensize <= 2) /* may read more than 1 char in one cicle */
  185. yytext = luaI_buffer(buffsize *= 2);
  186. switch (current)
  187. {
  188. case 0:
  189. save(0);
  190. return WRONGTOKEN;
  191. case '[':
  192. save_and_next();
  193. if (current == '[')
  194. {
  195. cont++;
  196. save_and_next();
  197. }
  198. continue;
  199. case ']':
  200. save_and_next();
  201. if (current == ']')
  202. {
  203. if (cont == 0) goto endloop;
  204. cont--;
  205. save_and_next();
  206. }
  207. continue;
  208. case '\n':
  209. save('\n');
  210. inclinenumber();
  211. continue;
  212. default:
  213. save_and_next();
  214. }
  215. } endloop:
  216. save_and_next(); /* pass the second ']' */
  217. yytext[tokensize-2] = 0; /* erases ']]' */
  218. luaY_lval.vWord = luaI_findconstantbyname(yytext+2);
  219. yytext[tokensize-2] = ']'; /* restores ']]' */
  220. save(0);
  221. return STRING;
  222. }
  223. int luaY_lex (void)
  224. {
  225. static int linelasttoken = 0;
  226. double a;
  227. int buffsize = MINBUFF;
  228. char *yytext = luaI_buffer(buffsize);
  229. yytext[1] = yytext[2] = yytext[3] = 0;
  230. if (lua_debug)
  231. luaI_codedebugline(linelasttoken);
  232. linelasttoken = lua_linenumber;
  233. while (1)
  234. {
  235. int tokensize = 0;
  236. switch (current)
  237. {
  238. case '\n':
  239. inclinenumber();
  240. linelasttoken = lua_linenumber;
  241. continue;
  242. case ' ': case '\t': case '\r': /* CR: to avoid problems with DOS */
  243. next();
  244. continue;
  245. case '-':
  246. save_and_next();
  247. if (current != '-') return '-';
  248. do { next(); } while (current != '\n' && current != 0);
  249. continue;
  250. case '[':
  251. save_and_next();
  252. if (current != '[') return '[';
  253. else
  254. {
  255. save_and_next(); /* pass the second '[' */
  256. return read_long_string(yytext, buffsize);
  257. }
  258. case '=':
  259. save_and_next();
  260. if (current != '=') return '=';
  261. else { save_and_next(); return EQ; }
  262. case '<':
  263. save_and_next();
  264. if (current != '=') return '<';
  265. else { save_and_next(); return LE; }
  266. case '>':
  267. save_and_next();
  268. if (current != '=') return '>';
  269. else { save_and_next(); return GE; }
  270. case '~':
  271. save_and_next();
  272. if (current != '=') return '~';
  273. else { save_and_next(); return NE; }
  274. case '"':
  275. case '\'':
  276. {
  277. int del = current;
  278. save_and_next();
  279. while (current != del)
  280. {
  281. if (buffsize-tokensize <= 2) /* may read more than 1 char in one cicle */
  282. yytext = luaI_buffer(buffsize *= 2);
  283. switch (current)
  284. {
  285. case 0:
  286. case '\n':
  287. save(0);
  288. return WRONGTOKEN;
  289. case '\\':
  290. next(); /* do not save the '\' */
  291. switch (current)
  292. {
  293. case 'n': save('\n'); next(); break;
  294. case 't': save('\t'); next(); break;
  295. case 'r': save('\r'); next(); break;
  296. case '\n': save('\n'); inclinenumber(); break;
  297. default : save_and_next(); break;
  298. }
  299. break;
  300. default:
  301. save_and_next();
  302. }
  303. }
  304. next(); /* skip delimiter */
  305. save(0);
  306. luaY_lval.vWord = luaI_findconstantbyname(yytext+1);
  307. tokensize--;
  308. save(del); save(0); /* restore delimiter */
  309. return STRING;
  310. }
  311. case 'a': case 'b': case 'c': case 'd': case 'e':
  312. case 'f': case 'g': case 'h': case 'i': case 'j':
  313. case 'k': case 'l': case 'm': case 'n': case 'o':
  314. case 'p': case 'q': case 'r': case 's': case 't':
  315. case 'u': case 'v': case 'w': case 'x': case 'y':
  316. case 'z':
  317. case 'A': case 'B': case 'C': case 'D': case 'E':
  318. case 'F': case 'G': case 'H': case 'I': case 'J':
  319. case 'K': case 'L': case 'M': case 'N': case 'O':
  320. case 'P': case 'Q': case 'R': case 'S': case 'T':
  321. case 'U': case 'V': case 'W': case 'X': case 'Y':
  322. case 'Z':
  323. case '_':
  324. {
  325. TaggedString *ts;
  326. do {
  327. save_and_next();
  328. } while (isalnum((unsigned char)current) || current == '_');
  329. save(0);
  330. ts = lua_createstring(yytext);
  331. if (ts->marked > 2)
  332. return ts->marked; /* reserved word */
  333. luaY_lval.pTStr = ts;
  334. ts->marked = 2; /* avoid GC */
  335. return NAME;
  336. }
  337. case '.':
  338. save_and_next();
  339. if (current == '.')
  340. {
  341. save_and_next();
  342. if (current == '.')
  343. {
  344. save_and_next();
  345. return DOTS; /* ... */
  346. }
  347. else return CONC; /* .. */
  348. }
  349. else if (!isdigit((unsigned char)current)) return '.';
  350. /* current is a digit: goes through to number */
  351. a=0.0;
  352. goto fraction;
  353. case '0': case '1': case '2': case '3': case '4':
  354. case '5': case '6': case '7': case '8': case '9':
  355. a=0.0;
  356. do {
  357. a=10.0*a+(current-'0');
  358. save_and_next();
  359. } while (isdigit((unsigned char)current));
  360. if (current == '.') {
  361. save_and_next();
  362. if (current == '.')
  363. luaI_syntaxerror(
  364. "ambiguous syntax (decimal point x string concatenation)");
  365. }
  366. fraction:
  367. { double da=0.1;
  368. while (isdigit((unsigned char)current))
  369. {
  370. a+=(current-'0')*da;
  371. da/=10.0;
  372. save_and_next();
  373. }
  374. if (current == 'e' || current == 'E')
  375. {
  376. int e=0;
  377. int neg;
  378. double ea;
  379. save_and_next();
  380. neg=(current=='-');
  381. if (current == '+' || current == '-') save_and_next();
  382. if (!isdigit((unsigned char)current)) {
  383. save(0); return WRONGTOKEN; }
  384. do {
  385. e=10.0*e+(current-'0');
  386. save_and_next();
  387. } while (isdigit((unsigned char)current));
  388. for (ea=neg?0.1:10.0; e>0; e>>=1)
  389. {
  390. if (e & 1) a*=ea;
  391. ea*=ea;
  392. }
  393. }
  394. luaY_lval.vFloat = a;
  395. save(0);
  396. return NUMBER;
  397. }
  398. case 0:
  399. save(0);
  400. if (iflevel > 0)
  401. luaI_syntaxerror("missing $endif");
  402. return 0;
  403. default:
  404. save_and_next();
  405. return yytext[0];
  406. }
  407. }
  408. }