llex.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*
  2. ** $Id: llex.c,v 1.118 2003/02/28 17:19:47 roberto Exp roberto $
  3. ** Lexical Analyzer
  4. ** See Copyright Notice in lua.h
  5. */
  6. #include <ctype.h>
  7. #include <string.h>
  8. #define llex_c
  9. #include "lua.h"
  10. #include "ldo.h"
  11. #include "llex.h"
  12. #include "lobject.h"
  13. #include "lparser.h"
  14. #include "lstate.h"
  15. #include "lstring.h"
  16. #include "lzio.h"
  17. #define next(LS) (LS->current = zgetc(LS->z))
  18. /* ORDER RESERVED */
  19. static const char *const token2string [] = {
  20. "and", "break", "do", "else", "elseif",
  21. "end", "false", "for", "function", "if",
  22. "in", "local", "nil", "not", "or", "repeat",
  23. "return", "then", "true", "until", "while", "*name",
  24. "..", "...", "==", ">=", "<=", "~=",
  25. "*number", "*string", "<eof>"
  26. };
  27. void luaX_init (lua_State *L) {
  28. int i;
  29. for (i=0; i<NUM_RESERVED; i++) {
  30. TString *ts = luaS_new(L, token2string[i]);
  31. luaS_fix(ts); /* reserved words are never collected */
  32. lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN);
  33. ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */
  34. }
  35. }
  36. #define MAXSRC 80
  37. void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) {
  38. if (val > limit) {
  39. msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit);
  40. luaX_syntaxerror(ls, msg);
  41. }
  42. }
  43. void luaX_errorline (LexState *ls, const char *s, const char *token, int line) {
  44. lua_State *L = ls->L;
  45. char buff[MAXSRC];
  46. luaO_chunkid(buff, getstr(ls->source), MAXSRC);
  47. luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token);
  48. luaD_throw(L, LUA_ERRSYNTAX);
  49. }
  50. static void luaX_error (LexState *ls, const char *s, const char *token) {
  51. luaX_errorline(ls, s, token, ls->linenumber);
  52. }
  53. void luaX_syntaxerror (LexState *ls, const char *msg) {
  54. const char *lasttoken;
  55. switch (ls->t.token) {
  56. case TK_NAME:
  57. lasttoken = getstr(ls->t.seminfo.ts);
  58. break;
  59. case TK_STRING:
  60. case TK_NUMBER:
  61. lasttoken = luaZ_buffer(ls->buff);
  62. break;
  63. default:
  64. lasttoken = luaX_token2str(ls, ls->t.token);
  65. break;
  66. }
  67. luaX_error(ls, msg, lasttoken);
  68. }
  69. const char *luaX_token2str (LexState *ls, int token) {
  70. if (token < FIRST_RESERVED) {
  71. lua_assert(token == (unsigned char)token);
  72. return luaO_pushfstring(ls->L, "%c", token);
  73. }
  74. else
  75. return token2string[token-FIRST_RESERVED];
  76. }
  77. static void luaX_lexerror (LexState *ls, const char *s, int token) {
  78. if (token == TK_EOS)
  79. luaX_error(ls, s, luaX_token2str(ls, token));
  80. else
  81. luaX_error(ls, s, luaZ_buffer(ls->buff));
  82. }
  83. static void inclinenumber (LexState *LS) {
  84. next(LS); /* skip `\n' */
  85. ++LS->linenumber;
  86. luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk");
  87. }
  88. void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
  89. LS->L = L;
  90. LS->lookahead.token = TK_EOS; /* no look-ahead token */
  91. LS->z = z;
  92. LS->fs = NULL;
  93. LS->linenumber = 1;
  94. LS->lastline = 1;
  95. LS->source = source;
  96. next(LS); /* read first char */
  97. if (LS->current == '#') {
  98. do { /* skip first line */
  99. next(LS);
  100. } while (LS->current != '\n' && LS->current != EOZ);
  101. }
  102. }
  103. /*
  104. ** =======================================================
  105. ** LEXICAL ANALYZER
  106. ** =======================================================
  107. */
  108. /* use buffer to store names, literal strings and numbers */
  109. /* extra space to allocate when growing buffer */
  110. #define EXTRABUFF 32
  111. /* maximum number of chars that can be read without checking buffer size */
  112. #define MAXNOCHECK 5
  113. #define checkbuffer(LS, len) \
  114. if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \
  115. luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF)
  116. #define save(LS, c, l) \
  117. (luaZ_buffer((LS)->buff)[l++] = cast(char, c))
  118. #define save_and_next(LS, l) (save(LS, LS->current, l), next(LS))
  119. static size_t readname (LexState *LS) {
  120. size_t l = 0;
  121. checkbuffer(LS, l);
  122. do {
  123. checkbuffer(LS, l);
  124. save_and_next(LS, l);
  125. } while (isalnum(LS->current) || LS->current == '_');
  126. save(LS, '\0', l);
  127. return l-1;
  128. }
  129. /* LUA_NUMBER */
  130. static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) {
  131. size_t l = 0;
  132. checkbuffer(LS, l);
  133. if (comma) save(LS, '.', l);
  134. while (isdigit(LS->current)) {
  135. checkbuffer(LS, l);
  136. save_and_next(LS, l);
  137. }
  138. if (LS->current == '.') {
  139. save_and_next(LS, l);
  140. if (LS->current == '.') {
  141. save_and_next(LS, l);
  142. save(LS, '\0', l);
  143. luaX_lexerror(LS,
  144. "ambiguous syntax (decimal point x string concatenation)",
  145. TK_NUMBER);
  146. }
  147. }
  148. while (isdigit(LS->current)) {
  149. checkbuffer(LS, l);
  150. save_and_next(LS, l);
  151. }
  152. if (LS->current == 'e' || LS->current == 'E') {
  153. save_and_next(LS, l); /* read `E' */
  154. if (LS->current == '+' || LS->current == '-')
  155. save_and_next(LS, l); /* optional exponent sign */
  156. while (isdigit(LS->current)) {
  157. checkbuffer(LS, l);
  158. save_and_next(LS, l);
  159. }
  160. }
  161. save(LS, '\0', l);
  162. if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r))
  163. luaX_lexerror(LS, "malformed number", TK_NUMBER);
  164. }
  165. static void read_long_string (LexState *LS, SemInfo *seminfo) {
  166. int cont = 0;
  167. size_t l = 0;
  168. checkbuffer(LS, l);
  169. save(LS, '[', l); /* save first `[' */
  170. save_and_next(LS, l); /* pass the second `[' */
  171. if (LS->current == '\n') /* string starts with a newline? */
  172. inclinenumber(LS); /* skip it */
  173. for (;;) {
  174. checkbuffer(LS, l);
  175. switch (LS->current) {
  176. case EOZ:
  177. save(LS, '\0', l);
  178. luaX_lexerror(LS, (seminfo) ? "unfinished long string" :
  179. "unfinished long comment", TK_EOS);
  180. break; /* to avoid warnings */
  181. case '[':
  182. save_and_next(LS, l);
  183. if (LS->current == '[') {
  184. cont++;
  185. save_and_next(LS, l);
  186. }
  187. continue;
  188. case ']':
  189. save_and_next(LS, l);
  190. if (LS->current == ']') {
  191. if (cont == 0) goto endloop;
  192. cont--;
  193. save_and_next(LS, l);
  194. }
  195. continue;
  196. case '\n':
  197. save(LS, '\n', l);
  198. inclinenumber(LS);
  199. if (!seminfo) l = 0; /* reset buffer to avoid wasting space */
  200. continue;
  201. default:
  202. save_and_next(LS, l);
  203. }
  204. } endloop:
  205. save_and_next(LS, l); /* skip the second `]' */
  206. save(LS, '\0', l);
  207. if (seminfo)
  208. seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5);
  209. }
  210. static void read_string (LexState *LS, int del, SemInfo *seminfo) {
  211. size_t l = 0;
  212. checkbuffer(LS, l);
  213. save_and_next(LS, l);
  214. while (LS->current != del) {
  215. checkbuffer(LS, l);
  216. switch (LS->current) {
  217. case EOZ:
  218. save(LS, '\0', l);
  219. luaX_lexerror(LS, "unfinished string", TK_EOS);
  220. break; /* to avoid warnings */
  221. case '\n':
  222. save(LS, '\0', l);
  223. luaX_lexerror(LS, "unfinished string", TK_STRING);
  224. break; /* to avoid warnings */
  225. case '\\':
  226. next(LS); /* do not save the `\' */
  227. switch (LS->current) {
  228. case 'a': save(LS, '\a', l); next(LS); break;
  229. case 'b': save(LS, '\b', l); next(LS); break;
  230. case 'f': save(LS, '\f', l); next(LS); break;
  231. case 'n': save(LS, '\n', l); next(LS); break;
  232. case 'r': save(LS, '\r', l); next(LS); break;
  233. case 't': save(LS, '\t', l); next(LS); break;
  234. case 'v': save(LS, '\v', l); next(LS); break;
  235. case '\n': save(LS, '\n', l); inclinenumber(LS); break;
  236. case EOZ: break; /* will raise an error next loop */
  237. default: {
  238. if (!isdigit(LS->current))
  239. save_and_next(LS, l); /* handles \\, \", \', and \? */
  240. else { /* \xxx */
  241. int c = 0;
  242. int i = 0;
  243. do {
  244. c = 10*c + (LS->current-'0');
  245. next(LS);
  246. } while (++i<3 && isdigit(LS->current));
  247. if (c > UCHAR_MAX) {
  248. save(LS, '\0', l);
  249. luaX_lexerror(LS, "escape sequence too large", TK_STRING);
  250. }
  251. save(LS, c, l);
  252. }
  253. }
  254. }
  255. break;
  256. default:
  257. save_and_next(LS, l);
  258. }
  259. }
  260. save_and_next(LS, l); /* skip delimiter */
  261. save(LS, '\0', l);
  262. seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3);
  263. }
  264. int luaX_lex (LexState *LS, SemInfo *seminfo) {
  265. for (;;) {
  266. switch (LS->current) {
  267. case '\n': {
  268. inclinenumber(LS);
  269. continue;
  270. }
  271. case '-': {
  272. next(LS);
  273. if (LS->current != '-') return '-';
  274. /* else is a comment */
  275. next(LS);
  276. if (LS->current == '[' && (next(LS), LS->current == '['))
  277. read_long_string(LS, NULL); /* long comment */
  278. else /* short comment */
  279. while (LS->current != '\n' && LS->current != EOZ)
  280. next(LS);
  281. continue;
  282. }
  283. case '[': {
  284. next(LS);
  285. if (LS->current != '[') return '[';
  286. else {
  287. read_long_string(LS, seminfo);
  288. return TK_STRING;
  289. }
  290. }
  291. case '=': {
  292. next(LS);
  293. if (LS->current != '=') return '=';
  294. else { next(LS); return TK_EQ; }
  295. }
  296. case '<': {
  297. next(LS);
  298. if (LS->current != '=') return '<';
  299. else { next(LS); return TK_LE; }
  300. }
  301. case '>': {
  302. next(LS);
  303. if (LS->current != '=') return '>';
  304. else { next(LS); return TK_GE; }
  305. }
  306. case '~': {
  307. next(LS);
  308. if (LS->current != '=') return '~';
  309. else { next(LS); return TK_NE; }
  310. }
  311. case '"':
  312. case '\'': {
  313. read_string(LS, LS->current, seminfo);
  314. return TK_STRING;
  315. }
  316. case '.': {
  317. next(LS);
  318. if (LS->current == '.') {
  319. next(LS);
  320. if (LS->current == '.') {
  321. next(LS);
  322. return TK_DOTS; /* ... */
  323. }
  324. else return TK_CONCAT; /* .. */
  325. }
  326. else if (!isdigit(LS->current)) return '.';
  327. else {
  328. read_numeral(LS, 1, seminfo);
  329. return TK_NUMBER;
  330. }
  331. }
  332. case EOZ: {
  333. return TK_EOS;
  334. }
  335. default: {
  336. if (isspace(LS->current)) {
  337. next(LS);
  338. continue;
  339. }
  340. else if (isdigit(LS->current)) {
  341. read_numeral(LS, 0, seminfo);
  342. return TK_NUMBER;
  343. }
  344. else if (isalpha(LS->current) || LS->current == '_') {
  345. /* identifier or reserved word */
  346. size_t l = readname(LS);
  347. TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l);
  348. if (ts->tsv.reserved > 0) /* reserved word? */
  349. return ts->tsv.reserved - 1 + FIRST_RESERVED;
  350. seminfo->ts = ts;
  351. return TK_NAME;
  352. }
  353. else {
  354. int c = LS->current;
  355. if (iscntrl(c))
  356. luaX_error(LS, "invalid control char",
  357. luaO_pushfstring(LS->L, "char(%d)", c));
  358. next(LS);
  359. return c; /* single-char tokens (+ - / ...) */
  360. }
  361. }
  362. }
  363. }
  364. }
  365. #undef next