lj_bcread.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /*
  2. ** Bytecode reader.
  3. ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
  4. */
  5. #define lj_bcread_c
  6. #define LUA_CORE
  7. #include "lj_obj.h"
  8. #include "lj_gc.h"
  9. #include "lj_err.h"
  10. #include "lj_buf.h"
  11. #include "lj_str.h"
  12. #include "lj_tab.h"
  13. #include "lj_bc.h"
  14. #if LJ_HASFFI
  15. #include "lj_ctype.h"
  16. #include "lj_cdata.h"
  17. #include "lualib.h"
  18. #endif
  19. #include "lj_lex.h"
  20. #include "lj_bcdump.h"
  21. #include "lj_state.h"
  22. #include "lj_strfmt.h"
  23. /* Reuse some lexer fields for our own purposes. */
  24. #define bcread_flags(ls) ls->level
  25. #define bcread_swap(ls) \
  26. ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
  27. #define bcread_oldtop(L, ls) restorestack(L, ls->lastline)
  28. #define bcread_savetop(L, ls, top) \
  29. ls->lastline = (BCLine)savestack(L, (top))
  30. /* -- Input buffer handling ----------------------------------------------- */
  31. /* Throw reader error. */
  32. static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
  33. {
  34. lua_State *L = ls->L;
  35. const char *name = ls->chunkarg;
  36. if (*name == BCDUMP_HEAD1) name = "(binary)";
  37. else if (*name == '@' || *name == '=') name++;
  38. lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
  39. lj_err_throw(L, LUA_ERRSYNTAX);
  40. }
  41. /* Refill buffer. */
  42. static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
  43. {
  44. lj_assertLS(len != 0, "empty refill");
  45. if (len > LJ_MAX_BUF || ls->c < 0)
  46. bcread_error(ls, LJ_ERR_BCBAD);
  47. do {
  48. const char *buf;
  49. size_t sz;
  50. char *p = ls->sb.b;
  51. MSize n = (MSize)(ls->pe - ls->p);
  52. if (n) { /* Copy remainder to buffer. */
  53. if (sbuflen(&ls->sb)) { /* Move down in buffer. */
  54. lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
  55. if (ls->p != p) memmove(p, ls->p, n);
  56. } else { /* Copy from buffer provided by reader. */
  57. p = lj_buf_need(&ls->sb, len);
  58. memcpy(p, ls->p, n);
  59. }
  60. ls->p = p;
  61. ls->pe = p + n;
  62. }
  63. ls->sb.w = p + n;
  64. buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
  65. if (buf == NULL || sz == 0) { /* EOF? */
  66. if (need) bcread_error(ls, LJ_ERR_BCBAD);
  67. ls->c = -1; /* Only bad if we get called again. */
  68. break;
  69. }
  70. if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
  71. if (n) { /* Append to buffer. */
  72. n += (MSize)sz;
  73. p = lj_buf_need(&ls->sb, n < len ? len : n);
  74. memcpy(ls->sb.w, buf, sz);
  75. ls->sb.w = p + n;
  76. ls->p = p;
  77. ls->pe = p + n;
  78. } else { /* Return buffer provided by reader. */
  79. ls->p = buf;
  80. ls->pe = buf + sz;
  81. }
  82. } while ((MSize)(ls->pe - ls->p) < len);
  83. }
  84. /* Need a certain number of bytes. */
  85. static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
  86. {
  87. if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
  88. bcread_fill(ls, len, 1);
  89. }
  90. /* Want to read up to a certain number of bytes, but may need less. */
  91. static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
  92. {
  93. if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
  94. bcread_fill(ls, len, 0);
  95. }
  96. /* Return memory block from buffer. */
  97. static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
  98. {
  99. uint8_t *p = (uint8_t *)ls->p;
  100. ls->p += len;
  101. lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
  102. return p;
  103. }
  104. /* Copy memory block from buffer. */
  105. static void bcread_block(LexState *ls, void *q, MSize len)
  106. {
  107. memcpy(q, bcread_mem(ls, len), len);
  108. }
  109. /* Read byte from buffer. */
  110. static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
  111. {
  112. lj_assertLS(ls->p < ls->pe, "buffer read overflow");
  113. return (uint32_t)(uint8_t)*ls->p++;
  114. }
  115. /* Read ULEB128 value from buffer. */
  116. static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
  117. {
  118. uint32_t v = lj_buf_ruleb128(&ls->p);
  119. lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
  120. return v;
  121. }
  122. /* Read top 32 bits of 33 bit ULEB128 value from buffer. */
  123. static uint32_t bcread_uleb128_33(LexState *ls)
  124. {
  125. const uint8_t *p = (const uint8_t *)ls->p;
  126. uint32_t v = (*p++ >> 1);
  127. if (LJ_UNLIKELY(v >= 0x40)) {
  128. int sh = -1;
  129. v &= 0x3f;
  130. do {
  131. v |= ((*p & 0x7f) << (sh += 7));
  132. } while (*p++ >= 0x80);
  133. }
  134. ls->p = (char *)p;
  135. lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
  136. return v;
  137. }
  138. /* -- Bytecode reader ----------------------------------------------------- */
  139. /* Read debug info of a prototype. */
  140. static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
  141. {
  142. void *lineinfo = (void *)proto_lineinfo(pt);
  143. bcread_block(ls, lineinfo, sizedbg);
  144. /* Swap lineinfo if the endianess differs. */
  145. if (bcread_swap(ls) && pt->numline >= 256) {
  146. MSize i, n = pt->sizebc-1;
  147. if (pt->numline < 65536) {
  148. uint16_t *p = (uint16_t *)lineinfo;
  149. for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
  150. } else {
  151. uint32_t *p = (uint32_t *)lineinfo;
  152. for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
  153. }
  154. }
  155. }
  156. /* Find pointer to varinfo. */
  157. static const void *bcread_varinfo(GCproto *pt)
  158. {
  159. const uint8_t *p = proto_uvinfo(pt);
  160. MSize n = pt->sizeuv;
  161. if (n) while (*p++ || --n) ;
  162. return p;
  163. }
  164. /* Read a single constant key/value of a template table. */
  165. static void bcread_ktabk(LexState *ls, TValue *o)
  166. {
  167. MSize tp = bcread_uleb128(ls);
  168. if (tp >= BCDUMP_KTAB_STR) {
  169. MSize len = tp - BCDUMP_KTAB_STR;
  170. const char *p = (const char *)bcread_mem(ls, len);
  171. setstrV(ls->L, o, lj_str_new(ls->L, p, len));
  172. } else if (tp == BCDUMP_KTAB_INT) {
  173. setintV(o, (int32_t)bcread_uleb128(ls));
  174. } else if (tp == BCDUMP_KTAB_NUM) {
  175. o->u32.lo = bcread_uleb128(ls);
  176. o->u32.hi = bcread_uleb128(ls);
  177. } else {
  178. lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
  179. setpriV(o, ~tp);
  180. }
  181. }
  182. /* Read a template table. */
  183. static GCtab *bcread_ktab(LexState *ls)
  184. {
  185. MSize narray = bcread_uleb128(ls);
  186. MSize nhash = bcread_uleb128(ls);
  187. GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
  188. if (narray) { /* Read array entries. */
  189. MSize i;
  190. TValue *o = tvref(t->array);
  191. for (i = 0; i < narray; i++, o++)
  192. bcread_ktabk(ls, o);
  193. }
  194. if (nhash) { /* Read hash entries. */
  195. MSize i;
  196. for (i = 0; i < nhash; i++) {
  197. TValue key;
  198. bcread_ktabk(ls, &key);
  199. lj_assertLS(!tvisnil(&key), "nil key");
  200. bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
  201. }
  202. }
  203. return t;
  204. }
  205. /* Read GC constants of a prototype. */
  206. static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
  207. {
  208. MSize i;
  209. GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
  210. for (i = 0; i < sizekgc; i++, kr++) {
  211. MSize tp = bcread_uleb128(ls);
  212. if (tp >= BCDUMP_KGC_STR) {
  213. MSize len = tp - BCDUMP_KGC_STR;
  214. const char *p = (const char *)bcread_mem(ls, len);
  215. setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
  216. } else if (tp == BCDUMP_KGC_TAB) {
  217. setgcref(*kr, obj2gco(bcread_ktab(ls)));
  218. #if LJ_HASFFI
  219. } else if (tp != BCDUMP_KGC_CHILD) {
  220. CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
  221. tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
  222. CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
  223. GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
  224. TValue *p = (TValue *)cdataptr(cd);
  225. setgcref(*kr, obj2gco(cd));
  226. p[0].u32.lo = bcread_uleb128(ls);
  227. p[0].u32.hi = bcread_uleb128(ls);
  228. if (tp == BCDUMP_KGC_COMPLEX) {
  229. p[1].u32.lo = bcread_uleb128(ls);
  230. p[1].u32.hi = bcread_uleb128(ls);
  231. }
  232. #endif
  233. } else {
  234. lua_State *L = ls->L;
  235. lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
  236. if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
  237. bcread_error(ls, LJ_ERR_BCBAD);
  238. L->top--;
  239. setgcref(*kr, obj2gco(protoV(L->top)));
  240. }
  241. }
  242. }
  243. /* Read number constants of a prototype. */
  244. static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
  245. {
  246. MSize i;
  247. TValue *o = mref(pt->k, TValue);
  248. for (i = 0; i < sizekn; i++, o++) {
  249. int isnum = (ls->p[0] & 1);
  250. uint32_t lo = bcread_uleb128_33(ls);
  251. if (isnum) {
  252. o->u32.lo = lo;
  253. o->u32.hi = bcread_uleb128(ls);
  254. } else {
  255. setintV(o, lo);
  256. }
  257. }
  258. }
  259. /* Read bytecode instructions. */
  260. static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
  261. {
  262. BCIns *bc = proto_bc(pt);
  263. BCIns op;
  264. if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
  265. else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
  266. else op = BC_FUNCF;
  267. bc[0] = BCINS_AD(op, pt->framesize, 0);
  268. bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
  269. /* Swap bytecode instructions if the endianess differs. */
  270. if (bcread_swap(ls)) {
  271. MSize i;
  272. for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
  273. }
  274. }
  275. /* Read upvalue refs. */
  276. static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
  277. {
  278. if (sizeuv) {
  279. uint16_t *uv = proto_uv(pt);
  280. bcread_block(ls, uv, sizeuv*2);
  281. /* Swap upvalue refs if the endianess differs. */
  282. if (bcread_swap(ls)) {
  283. MSize i;
  284. for (i = 0; i < sizeuv; i++)
  285. uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
  286. }
  287. }
  288. }
  289. /* Read a prototype. */
  290. GCproto *lj_bcread_proto(LexState *ls)
  291. {
  292. GCproto *pt;
  293. MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
  294. MSize ofsk, ofsuv, ofsdbg;
  295. MSize sizedbg = 0;
  296. BCLine firstline = 0, numline = 0;
  297. /* Read prototype header. */
  298. flags = bcread_byte(ls);
  299. numparams = bcread_byte(ls);
  300. framesize = bcread_byte(ls);
  301. sizeuv = bcread_byte(ls);
  302. sizekgc = bcread_uleb128(ls);
  303. sizekn = bcread_uleb128(ls);
  304. sizebc = bcread_uleb128(ls) + 1;
  305. if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
  306. sizedbg = bcread_uleb128(ls);
  307. if (sizedbg) {
  308. firstline = bcread_uleb128(ls);
  309. numline = bcread_uleb128(ls);
  310. }
  311. }
  312. /* Calculate total size of prototype including all colocated arrays. */
  313. sizept = (MSize)sizeof(GCproto) +
  314. sizebc*(MSize)sizeof(BCIns) +
  315. sizekgc*(MSize)sizeof(GCRef);
  316. sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
  317. ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
  318. ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
  319. ofsdbg = sizept; sizept += sizedbg;
  320. /* Allocate prototype object and initialize its fields. */
  321. pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
  322. pt->gct = ~LJ_TPROTO;
  323. pt->numparams = (uint8_t)numparams;
  324. pt->framesize = (uint8_t)framesize;
  325. pt->sizebc = sizebc;
  326. setmref(pt->k, (char *)pt + ofsk);
  327. setmref(pt->uv, (char *)pt + ofsuv);
  328. pt->sizekgc = 0; /* Set to zero until fully initialized. */
  329. pt->sizekn = sizekn;
  330. pt->sizept = sizept;
  331. pt->sizeuv = (uint8_t)sizeuv;
  332. pt->flags = (uint8_t)flags;
  333. pt->trace = 0;
  334. setgcref(pt->chunkname, obj2gco(ls->chunkname));
  335. /* Close potentially uninitialized gap between bc and kgc. */
  336. *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
  337. /* Read bytecode instructions and upvalue refs. */
  338. bcread_bytecode(ls, pt, sizebc);
  339. bcread_uv(ls, pt, sizeuv);
  340. /* Read constants. */
  341. bcread_kgc(ls, pt, sizekgc);
  342. pt->sizekgc = sizekgc;
  343. bcread_knum(ls, pt, sizekn);
  344. /* Read and initialize debug info. */
  345. pt->firstline = firstline;
  346. pt->numline = numline;
  347. if (sizedbg) {
  348. MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
  349. setmref(pt->lineinfo, (char *)pt + ofsdbg);
  350. setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
  351. bcread_dbg(ls, pt, sizedbg);
  352. setmref(pt->varinfo, bcread_varinfo(pt));
  353. } else {
  354. setmref(pt->lineinfo, NULL);
  355. setmref(pt->uvinfo, NULL);
  356. setmref(pt->varinfo, NULL);
  357. }
  358. return pt;
  359. }
  360. /* Read and check header of bytecode dump. */
  361. static int bcread_header(LexState *ls)
  362. {
  363. uint32_t flags;
  364. bcread_want(ls, 3+5+5);
  365. if (bcread_byte(ls) != BCDUMP_HEAD2 ||
  366. bcread_byte(ls) != BCDUMP_HEAD3 ||
  367. bcread_byte(ls) != BCDUMP_VERSION) return 0;
  368. bcread_flags(ls) = flags = bcread_uleb128(ls);
  369. if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
  370. if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
  371. if ((flags & BCDUMP_F_FFI)) {
  372. #if LJ_HASFFI
  373. lua_State *L = ls->L;
  374. ctype_loadffi(L);
  375. #else
  376. return 0;
  377. #endif
  378. }
  379. if ((flags & BCDUMP_F_STRIP)) {
  380. ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
  381. } else {
  382. MSize len = bcread_uleb128(ls);
  383. bcread_need(ls, len);
  384. ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
  385. }
  386. return 1; /* Ok. */
  387. }
  388. /* Read a bytecode dump. */
  389. GCproto *lj_bcread(LexState *ls)
  390. {
  391. lua_State *L = ls->L;
  392. lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
  393. bcread_savetop(L, ls, L->top);
  394. lj_buf_reset(&ls->sb);
  395. /* Check for a valid bytecode dump header. */
  396. if (!bcread_header(ls))
  397. bcread_error(ls, LJ_ERR_BCFMT);
  398. for (;;) { /* Process all prototypes in the bytecode dump. */
  399. GCproto *pt;
  400. MSize len;
  401. const char *startp;
  402. /* Read length. */
  403. if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
  404. ls->p++;
  405. break;
  406. }
  407. bcread_want(ls, 5);
  408. len = bcread_uleb128(ls);
  409. if (!len) break; /* EOF */
  410. bcread_need(ls, len);
  411. startp = ls->p;
  412. pt = lj_bcread_proto(ls);
  413. if (ls->p != startp + len)
  414. bcread_error(ls, LJ_ERR_BCBAD);
  415. setprotoV(L, L->top, pt);
  416. incr_top(L);
  417. }
  418. if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
  419. bcread_error(ls, LJ_ERR_BCBAD);
  420. /* Pop off last prototype. */
  421. L->top--;
  422. return protoV(L->top);
  423. }