llex.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /*
  2. ** $Id: llex.c,v 1.108 2002/07/10 20:43:53 roberto Exp roberto $
  3. ** Lexical Analyzer
  4. ** See Copyright Notice in lua.h
  5. */
  6. #include <ctype.h>
  7. #include <string.h>
  8. #include "lua.h"
  9. #include "ldo.h"
  10. #include "llex.h"
  11. #include "lobject.h"
  12. #include "lparser.h"
  13. #include "lstate.h"
  14. #include "lstring.h"
  15. #include "lzio.h"
  16. #define next(LS) (LS->current = zgetc(LS->z))
  17. /* ORDER RESERVED */
  18. static const char *const token2string [] = {
  19. "and", "break", "do", "else", "elseif",
  20. "end", "false", "for", "function", "global", "if",
  21. "in", "local", "nil", "not", "or", "repeat",
  22. "return", "then", "true", "until", "while", "*name",
  23. "..", "...", "==", ">=", "<=", "~=",
  24. "*number", "*string", "<eof>"
  25. };
  26. void luaX_init (lua_State *L) {
  27. int i;
  28. for (i=0; i<NUM_RESERVED; i++) {
  29. TString *ts = luaS_new(L, token2string[i]);
  30. luaS_fix(ts); /* reserved words are never collected */
  31. lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN);
  32. ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */
  33. }
  34. }
  35. #define MAXSRC 80
  36. void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) {
  37. if (val > limit) {
  38. msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit);
  39. luaX_syntaxerror(ls, msg);
  40. }
  41. }
  42. static void luaX_error (LexState *ls, const char *s, const char *token) {
  43. lua_State *L = ls->L;
  44. char buff[MAXSRC];
  45. luaO_chunkid(buff, getstr(ls->source), MAXSRC);
  46. luaO_pushfstring(L, "%s:%d: %s near `%s'\n", buff, ls->linenumber, s, token);
  47. luaD_throw(L, LUA_ERRSYNTAX);
  48. }
  49. void luaX_syntaxerror (LexState *ls, const char *msg) {
  50. const char *lasttoken;
  51. switch (ls->t.token) {
  52. case TK_NAME:
  53. lasttoken = luaO_pushfstring(ls->L, "%s", getstr(ls->t.seminfo.ts));
  54. break;
  55. case TK_STRING:
  56. lasttoken = luaO_pushfstring(ls->L, "\"%s\"", getstr(ls->t.seminfo.ts));
  57. break;
  58. case TK_NUMBER:
  59. lasttoken = luaO_pushfstring(ls->L, "%f", ls->t.seminfo.r);
  60. break;
  61. default:
  62. lasttoken = luaX_token2str(ls, ls->t.token);
  63. break;
  64. }
  65. luaX_error(ls, msg, lasttoken);
  66. }
  67. const char *luaX_token2str (LexState *ls, int token) {
  68. if (token < FIRST_RESERVED) {
  69. lua_assert(token == (char)token);
  70. return luaO_pushfstring(ls->L, "%c", token);
  71. }
  72. else
  73. return token2string[token-FIRST_RESERVED];
  74. }
  75. static void luaX_lexerror (LexState *ls, const char *s, int token) {
  76. if (token == TK_EOS)
  77. luaX_error(ls, s, luaX_token2str(ls, token));
  78. else
  79. luaX_error(ls, s, cast(char *, G(ls->L)->Mbuffer));
  80. }
  81. static void inclinenumber (LexState *LS) {
  82. next(LS); /* skip `\n' */
  83. ++LS->linenumber;
  84. luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk");
  85. }
  86. void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
  87. LS->L = L;
  88. LS->lookahead.token = TK_EOS; /* no look-ahead token */
  89. LS->z = z;
  90. LS->fs = NULL;
  91. LS->linenumber = 1;
  92. LS->lastline = 1;
  93. LS->source = source;
  94. next(LS); /* read first char */
  95. if (LS->current == '#') {
  96. do { /* skip first line */
  97. next(LS);
  98. } while (LS->current != '\n' && LS->current != EOZ);
  99. }
  100. }
  101. /*
  102. ** =======================================================
  103. ** LEXICAL ANALYZER
  104. ** =======================================================
  105. */
  106. /* use Mbuffer to store names, literal strings and numbers */
  107. #define EXTRABUFF 128
  108. #define checkbuffer(L, len) \
  109. if (((len)+10)*sizeof(char) > G(L)->Mbuffsize) \
  110. luaO_openspace(L, (len)+EXTRABUFF, char)
  111. #define save(L, c, l) (cast(char *, G(L)->Mbuffer)[l++] = cast(char, c))
  112. #define save_and_next(L, LS, l) (save(L, LS->current, l), next(LS))
  113. static size_t readname (LexState *LS) {
  114. lua_State *L = LS->L;
  115. size_t l = 0;
  116. checkbuffer(L, l);
  117. do {
  118. checkbuffer(L, l);
  119. save_and_next(L, LS, l);
  120. } while (isalnum(LS->current) || LS->current == '_');
  121. save(L, '\0', l);
  122. return l-1;
  123. }
  124. /* LUA_NUMBER */
  125. static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) {
  126. lua_State *L = LS->L;
  127. size_t l = 0;
  128. checkbuffer(L, l);
  129. if (comma) save(L, '.', l);
  130. while (isdigit(LS->current)) {
  131. checkbuffer(L, l);
  132. save_and_next(L, LS, l);
  133. }
  134. if (LS->current == '.') {
  135. save_and_next(L, LS, l);
  136. if (LS->current == '.') {
  137. save_and_next(L, LS, l);
  138. save(L, '\0', l);
  139. luaX_lexerror(LS,
  140. "ambiguous syntax (decimal point x string concatenation)",
  141. TK_NUMBER);
  142. }
  143. }
  144. while (isdigit(LS->current)) {
  145. checkbuffer(L, l);
  146. save_and_next(L, LS, l);
  147. }
  148. if (LS->current == 'e' || LS->current == 'E') {
  149. save_and_next(L, LS, l); /* read `E' */
  150. if (LS->current == '+' || LS->current == '-')
  151. save_and_next(L, LS, l); /* optional exponent sign */
  152. while (isdigit(LS->current)) {
  153. checkbuffer(L, l);
  154. save_and_next(L, LS, l);
  155. }
  156. }
  157. save(L, '\0', l);
  158. if (!luaO_str2d(cast(char *, G(L)->Mbuffer), &seminfo->r))
  159. luaX_lexerror(LS, "malformed number", TK_NUMBER);
  160. }
  161. static void read_long_string (LexState *LS, SemInfo *seminfo) {
  162. lua_State *L = LS->L;
  163. int cont = 0;
  164. size_t l = 0;
  165. checkbuffer(L, l);
  166. save(L, '[', l); /* save first `[' */
  167. save_and_next(L, LS, l); /* pass the second `[' */
  168. if (LS->current == '\n') /* string starts with a newline? */
  169. inclinenumber(LS); /* skip it */
  170. for (;;) {
  171. checkbuffer(L, l);
  172. switch (LS->current) {
  173. case EOZ:
  174. save(L, '\0', l);
  175. luaX_lexerror(LS, (seminfo) ? "unfinished long string" :
  176. "unfinished long comment", TK_EOS);
  177. break; /* to avoid warnings */
  178. case '[':
  179. save_and_next(L, LS, l);
  180. if (LS->current == '[') {
  181. cont++;
  182. save_and_next(L, LS, l);
  183. }
  184. continue;
  185. case ']':
  186. save_and_next(L, LS, l);
  187. if (LS->current == ']') {
  188. if (cont == 0) goto endloop;
  189. cont--;
  190. save_and_next(L, LS, l);
  191. }
  192. continue;
  193. case '\n':
  194. save(L, '\n', l);
  195. inclinenumber(LS);
  196. if (!seminfo) l = 0; /* reset buffer to avoid wasting space */
  197. continue;
  198. default:
  199. save_and_next(L, LS, l);
  200. }
  201. } endloop:
  202. save_and_next(L, LS, l); /* skip the second `]' */
  203. save(L, '\0', l);
  204. if (seminfo)
  205. seminfo->ts = luaS_newlstr(L, cast(char *, G(L)->Mbuffer)+2, l-5);
  206. }
  207. static void read_string (LexState *LS, int del, SemInfo *seminfo) {
  208. lua_State *L = LS->L;
  209. size_t l = 0;
  210. checkbuffer(L, l);
  211. save_and_next(L, LS, l);
  212. while (LS->current != del) {
  213. checkbuffer(L, l);
  214. switch (LS->current) {
  215. case EOZ:
  216. save(L, '\0', l);
  217. luaX_lexerror(LS, "unfinished string", TK_EOS);
  218. break; /* to avoid warnings */
  219. case '\n':
  220. save(L, '\0', l);
  221. luaX_lexerror(LS, "unfinished string", TK_STRING);
  222. break; /* to avoid warnings */
  223. case '\\':
  224. next(LS); /* do not save the `\' */
  225. switch (LS->current) {
  226. case 'a': save(L, '\a', l); next(LS); break;
  227. case 'b': save(L, '\b', l); next(LS); break;
  228. case 'f': save(L, '\f', l); next(LS); break;
  229. case 'n': save(L, '\n', l); next(LS); break;
  230. case 'r': save(L, '\r', l); next(LS); break;
  231. case 't': save(L, '\t', l); next(LS); break;
  232. case 'v': save(L, '\v', l); next(LS); break;
  233. case '\n': save(L, '\n', l); inclinenumber(LS); break;
  234. case EOZ: break; /* will raise an error next loop */
  235. default: {
  236. if (!isdigit(LS->current))
  237. save_and_next(L, LS, l); /* handles \\, \", \', and \? */
  238. else { /* \xxx */
  239. int c = 0;
  240. int i = 0;
  241. do {
  242. c = 10*c + (LS->current-'0');
  243. next(LS);
  244. } while (++i<3 && isdigit(LS->current));
  245. if (c > UCHAR_MAX) {
  246. save(L, '\0', l);
  247. luaX_lexerror(LS, "escape sequence too large", TK_STRING);
  248. }
  249. save(L, c, l);
  250. }
  251. }
  252. }
  253. break;
  254. default:
  255. save_and_next(L, LS, l);
  256. }
  257. }
  258. save_and_next(L, LS, l); /* skip delimiter */
  259. save(L, '\0', l);
  260. seminfo->ts = luaS_newlstr(L, cast(char *, G(L)->Mbuffer)+1, l-3);
  261. }
  262. int luaX_lex (LexState *LS, SemInfo *seminfo) {
  263. for (;;) {
  264. switch (LS->current) {
  265. case '\n': {
  266. inclinenumber(LS);
  267. continue;
  268. }
  269. case '-': {
  270. next(LS);
  271. if (LS->current != '-') return '-';
  272. /* else is a comment */
  273. next(LS);
  274. if (LS->current == '[' && (next(LS), LS->current == '['))
  275. read_long_string(LS, NULL); /* long comment */
  276. else /* short comment */
  277. while (LS->current != '\n' && LS->current != EOZ)
  278. next(LS);
  279. continue;
  280. }
  281. case '[': {
  282. next(LS);
  283. if (LS->current != '[') return '[';
  284. else {
  285. read_long_string(LS, seminfo);
  286. return TK_STRING;
  287. }
  288. }
  289. case '=': {
  290. next(LS);
  291. if (LS->current != '=') return '=';
  292. else { next(LS); return TK_EQ; }
  293. }
  294. case '<': {
  295. next(LS);
  296. if (LS->current != '=') return '<';
  297. else { next(LS); return TK_LE; }
  298. }
  299. case '>': {
  300. next(LS);
  301. if (LS->current != '=') return '>';
  302. else { next(LS); return TK_GE; }
  303. }
  304. case '~': {
  305. next(LS);
  306. if (LS->current != '=') return '~';
  307. else { next(LS); return TK_NE; }
  308. }
  309. case '"':
  310. case '\'': {
  311. read_string(LS, LS->current, seminfo);
  312. return TK_STRING;
  313. }
  314. case '.': {
  315. next(LS);
  316. if (LS->current == '.') {
  317. next(LS);
  318. if (LS->current == '.') {
  319. next(LS);
  320. return TK_DOTS; /* ... */
  321. }
  322. else return TK_CONCAT; /* .. */
  323. }
  324. else if (!isdigit(LS->current)) return '.';
  325. else {
  326. read_numeral(LS, 1, seminfo);
  327. return TK_NUMBER;
  328. }
  329. }
  330. case EOZ: {
  331. return TK_EOS;
  332. }
  333. default: {
  334. if (isspace(LS->current)) {
  335. next(LS);
  336. continue;
  337. }
  338. else if (isdigit(LS->current)) {
  339. read_numeral(LS, 0, seminfo);
  340. return TK_NUMBER;
  341. }
  342. else if (isalpha(LS->current) || LS->current == '_') {
  343. /* identifier or reserved word */
  344. size_t l = readname(LS);
  345. TString *ts = luaS_newlstr(LS->L, cast(char *, G(LS->L)->Mbuffer), l);
  346. if (ts->tsv.reserved > 0) /* reserved word? */
  347. return ts->tsv.reserved - 1 + FIRST_RESERVED;
  348. seminfo->ts = ts;
  349. return TK_NAME;
  350. }
  351. else {
  352. int c = LS->current;
  353. if (iscntrl(c))
  354. luaX_error(LS, "invalid control char",
  355. luaO_pushfstring(LS->L, "char(%d)", c));
  356. next(LS);
  357. return c; /* single-char tokens (+ - / ...) */
  358. }
  359. }
  360. }
  361. }
  362. }
  363. #undef next