Просмотр исходного кода

String buffers, part 3d: Compile string buffer methods and functions.

Sponsored by fmad.io.
Mike Pall 4 лет назад
Родитель
Сommit
29bc1f04ac
18 измененных файлов с 625 добавлено и 87 удалено
  1. 6 6
      src/Makefile.dep
  2. 2 0
      src/host/buildvm_lib.c
  3. 21 31
      src/lib_buffer.c
  4. 34 2
      src/lj_buf.c
  5. 11 1
      src/lj_buf.h
  6. 16 1
      src/lj_crecord.c
  7. 4 0
      src/lj_crecord.h
  8. 19 17
      src/lj_err.c
  9. 335 10
      src/lj_ffrecord.c
  10. 1 0
      src/lj_ir.c
  11. 7 2
      src/lj_ir.h
  12. 18 0
      src/lj_ircall.h
  13. 1 0
      src/lj_iropt.h
  14. 46 11
      src/lj_opt_fold.c
  15. 26 2
      src/lj_opt_mem.c
  16. 10 0
      src/lj_record.c
  17. 62 3
      src/lj_serialize.c
  18. 6 1
      src/lj_serialize.h

+ 6 - 6
src/Makefile.dep

@@ -114,10 +114,10 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
+ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
+ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
@@ -131,7 +131,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_prng.h
+ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
  lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
@@ -185,7 +185,7 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
 lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
- lj_udata.h lj_ctype.h lj_cdata.h lj_serialize.h
+ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \

+ 2 - 0
src/host/buildvm_lib.c

@@ -385,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
 	  ok = LJ_HASJIT;
 	else if (!strcmp(buf, "#if LJ_HASFFI\n"))
 	  ok = LJ_HASFFI;
+	else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
+	  ok = LJ_HASBUFFER;
 	if (!ok) {
 	  int lvl = 1;
 	  while (fgets(buf, sizeof(buf), fp) != NULL) {

+ 21 - 31
src/lib_buffer.c

@@ -61,7 +61,7 @@ LJLIB_CF(buffer_method_free)
   return 1;
 }
 
-LJLIB_CF(buffer_method_reset)
+LJLIB_CF(buffer_method_reset)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   lj_bufx_reset(sbx);
@@ -69,7 +69,7 @@ LJLIB_CF(buffer_method_reset)
   return 1;
 }
 
-LJLIB_CF(buffer_method_skip)
+LJLIB_CF(buffer_method_skip)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
@@ -83,7 +83,7 @@ LJLIB_CF(buffer_method_skip)
   return 1;
 }
 
-LJLIB_CF(buffer_method_set)
+LJLIB_CF(buffer_method_set)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   GCobj *ref;
@@ -111,7 +111,7 @@ LJLIB_CF(buffer_method_set)
   return 1;
 }
 
-LJLIB_CF(buffer_method_put)
+LJLIB_CF(buffer_method_put)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
   ptrdiff_t arg, narg = L->top - L->base;
@@ -147,7 +147,7 @@ LJLIB_CF(buffer_method_put)
   return 1;
 }
 
-LJLIB_CF(buffer_method_putf)
+LJLIB_CF(buffer_method_putf)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
   lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
@@ -156,7 +156,7 @@ LJLIB_CF(buffer_method_putf)
   return 1;
 }
 
-LJLIB_CF(buffer_method_get)
+LJLIB_CF(buffer_method_get)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   ptrdiff_t arg, narg = L->top - L->base;
@@ -179,7 +179,7 @@ LJLIB_CF(buffer_method_get)
 }
 
 #if LJ_HASFFI
-LJLIB_CF(buffer_method_putcdata)
+LJLIB_CF(buffer_method_putcdata)	LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
   const char *p;
@@ -197,12 +197,12 @@ LJLIB_CF(buffer_method_putcdata)
   return 1;
 }
 
-LJLIB_CF(buffer_method_reserve)
+LJLIB_CF(buffer_method_reserve)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
-  MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
   GCcdata *cd;
-  lj_buf_more((SBuf *)sbx, len);
+  lj_buf_more((SBuf *)sbx, sz);
   ctype_loadffi(L);
   cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
   *(void **)cdataptr(cd) = sbx->w;
@@ -211,7 +211,7 @@ LJLIB_CF(buffer_method_reserve)
   return 2;
 }
 
-LJLIB_CF(buffer_method_commit)
+LJLIB_CF(buffer_method_commit)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
@@ -221,7 +221,7 @@ LJLIB_CF(buffer_method_commit)
   return 1;
 }
 
-LJLIB_CF(buffer_method_ref)
+LJLIB_CF(buffer_method_ref)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   GCcdata *cd;
@@ -234,7 +234,7 @@ LJLIB_CF(buffer_method_ref)
 }
 #endif
 
-LJLIB_CF(buffer_method_encode)
+LJLIB_CF(buffer_method_encode)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
   cTValue *o = lj_lib_checkany(L, 2);
@@ -244,11 +244,11 @@ LJLIB_CF(buffer_method_encode)
   return 1;
 }
 
-LJLIB_CF(buffer_method_decode)
+LJLIB_CF(buffer_method_decode)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobufw(L);
   setnilV(L->top++);
-  lj_serialize_get(sbx, L->top-1);
+  sbx->r = lj_serialize_get(sbx, L->top-1);
   lj_gc_check(L);
   return 1;
 }
@@ -260,7 +260,7 @@ LJLIB_CF(buffer_method___gc)
   return 0;
 }
 
-LJLIB_CF(buffer_method___tostring)
+LJLIB_CF(buffer_method___tostring)	LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
@@ -268,7 +268,7 @@ LJLIB_CF(buffer_method___tostring)
   return 1;
 }
 
-LJLIB_CF(buffer_method___len)
+LJLIB_CF(buffer_method___len)		LJLIB_REC(.)
 {
   SBufExt *sbx = buffer_tobuf(L);
   setintV(L->top-1, (int32_t)sbufxlen(sbx));
@@ -317,29 +317,19 @@ LJLIB_CF(buffer_new)
   return 1;
 }
 
-LJLIB_CF(buffer_encode)
+LJLIB_CF(buffer_encode)			LJLIB_REC(.)
 {
   cTValue *o = lj_lib_checkany(L, 1);
-  SBufExt sbx;
-  memset(&sbx, 0, sizeof(SBufExt));
-  lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
-  lj_serialize_put(&sbx, o);
-  setstrV(L, L->top++, lj_buf_str(L, (SBuf *)&sbx));
+  setstrV(L, L->top++, lj_serialize_encode(L, o));
   lj_gc_check(L);
   return 1;
 }
 
-LJLIB_CF(buffer_decode)
+LJLIB_CF(buffer_decode)			LJLIB_REC(.)
 {
   GCstr *str = lj_lib_checkstrx(L, 1);
-  SBufExt sbx;
-  memset(&sbx, 0, sizeof(SBufExt));
-  lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
-  /* No need to set sbx.cowref here. */
   setnilV(L->top++);
-  lj_serialize_get(&sbx, L->top-1);
-  lj_gc_check(L);
-  if (sbx.r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
+  lj_serialize_decode(L, L->top-1, str);
   return 1;
 }
 

+ 34 - 2
src/lj_buf.c

@@ -108,6 +108,25 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
   return lj_buf_need(sb, sz);
 }
 
+#if LJ_HASBUFFER && LJ_HASJIT
+void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
+{
+  lua_State *L = sbufL(sbx);
+  lj_bufx_free(L, sbx);
+  lj_bufx_set_cow(L, sbx, p, len);
+  setgcref(sbx->cowref, ref);
+  lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
+}
+
+#if LJ_HASFFI
+MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
+{
+  lj_buf_more((SBuf *)sbx, sz);
+  return sbufleft(sbx);
+}
+#endif
+#endif
+
 /* -- Low-level buffer put operations ------------------------------------- */
 
 SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
@@ -118,14 +137,27 @@ SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
   return sb;
 }
 
-SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+#if LJ_HASJIT || LJ_HASFFI
+static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
 {
-  char *w = lj_buf_more(sb, 1);
+  char *w = lj_buf_more2(sb, 1);
   *w++ = (char)c;
   sb->w = w;
   return sb;
 }
 
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *w = sb->w;
+  if (LJ_LIKELY(w < sb->e)) {
+    *w++ = (char)c;
+    sb->w = w;
+    return sb;
+  }
+  return lj_buf_putchar2(sb, c);
+}
+#endif
+
 SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;

+ 11 - 1
src/lj_buf.h

@@ -55,6 +55,7 @@ typedef struct SBufExt {
 #define sbufisext(sb)		(sbufflag((sb)) & SBUF_FLAG_EXT)
 #define sbufiscow(sb)		(sbufflag((sb)) & SBUF_FLAG_COW)
 #define sbufisborrow(sb)	(sbufflag((sb)) & SBUF_FLAG_BORROW)
+#define sbufiscoworborrow(sb)	(sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
 #define sbufX(sb) \
   (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
 #define setsbufflag(sb, flag)	(setmrefu((sb)->L, (flag)))
@@ -143,15 +144,24 @@ static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
 
 static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
 {
-  if (!sbufiscow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
+  if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
   setsbufXL(sbx, L, SBUF_FLAG_EXT);
   setgcrefnull(sbx->cowref);
   sbx->r = sbx->w = sbx->b = sbx->e = NULL;
 }
 
+#if LJ_HASBUFFER && LJ_HASJIT
+LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
+#if LJ_HASFFI
+LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
+#endif
+#endif
+
 /* Low-level buffer put operations */
 LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT || LJ_HASFFI
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
 
 static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)

+ 16 - 1
src/lj_crecord.c

@@ -621,7 +621,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
       emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
       sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
 		  ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
-						 IRFL_UDATA_BUF_R);
+						 IRFL_SBUF_R);
     } else {
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
     }
@@ -1918,10 +1918,25 @@ TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
   CTypeID id = argv2cdata(J, tr, o)->ctypeid;
   if (!(id == CTID_INT64 || id == CTID_UINT64))
     lj_trace_err(J, LJ_TRERR_BADTYPE);
+  lj_needsplit(J);
   return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
 		IRFL_CDATA_INT64);
 }
 
+#if LJ_HASBUFFER
+TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
+}
+
+TRef lj_crecord_topuint8(jit_State *J, TRef tr)
+{
+  return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
+}
+#endif
+
 #undef IR
 #undef emitir
 #undef emitconv

+ 4 - 0
src/lj_crecord.h

@@ -34,6 +34,10 @@ LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
 
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
 LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
+#if LJ_HASBUFFER
+LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
+LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
+#endif
 #endif
 
 #endif

+ 19 - 17
src/lj_err.c

@@ -941,25 +941,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
 /* Error in context of caller. */
 LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
 {
-  TValue *frame = L->base-1;
-  TValue *pframe = NULL;
-  if (frame_islua(frame)) {
-    pframe = frame_prevl(frame);
-  } else if (frame_iscont(frame)) {
-    if (frame_iscont_fficb(frame)) {
-      pframe = frame;
-      frame = NULL;
-    } else {
-      pframe = frame_prevd(frame);
+  TValue *frame = NULL, *pframe = NULL;
+  if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
+    frame = L->base-1;
+    if (frame_islua(frame)) {
+      pframe = frame_prevl(frame);
+    } else if (frame_iscont(frame)) {
+      if (frame_iscont_fficb(frame)) {
+	pframe = frame;
+	frame = NULL;
+      } else {
+	pframe = frame_prevd(frame);
 #if LJ_HASFFI
-      /* Remove frame for FFI metamethods. */
-      if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
-	  frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
-	L->base = pframe+1;
-	L->top = frame;
-	setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
-      }
+	/* Remove frame for FFI metamethods. */
+	if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+	    frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+	  L->base = pframe+1;
+	  L->top = frame;
+	  setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
+	}
 #endif
+      }
     }
   }
   lj_debug_addloc(L, msg, pframe, frame);

+ 335 - 10
src/lj_ffrecord.c

@@ -11,6 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
@@ -28,6 +29,7 @@
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strfmt.h"
+#include "lj_serialize.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -941,20 +943,18 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
   }
 }
 
-static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
 {
-  TRef trfmt = lj_ir_tostr(J, J->base[0]);
-  GCstr *fmt = argv2str(J, &rd->argv[0]);
-  int arg = 1;
-  TRef hdr, tr;
+  ptrdiff_t arg = sbufx;
+  TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
+  GCstr *fmt = argv2str(J, &rd->argv[arg]);
   FormatState fs;
   SFormat sf;
   /* Specialize to the format string. */
   emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
-  tr = hdr = recff_bufhdr(J);
   lj_strfmt_init(&fs, strdata(fmt), fmt->len);
   while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {  /* Parse format. */
-    TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
+    TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
     TRef trsf = lj_ir_kint(J, (int32_t)sf);
     IRCallID id;
     switch (STRFMT_TYPE(sf)) {
@@ -968,9 +968,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
       if (!tref_isinteger(tra)) {
 #if LJ_HASFFI
 	if (tref_iscdata(tra)) {
-	  tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg-1]);
+	  tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
 	  tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
-	  lj_needsplit(J);
 	  break;
 	}
 #endif
@@ -1004,6 +1003,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
     case STRFMT_STR:
       if (!tref_isstr(tra)) {
 	recff_nyiu(J, rd);  /* NYI: __tostring and non-string types for %s. */
+	/* NYI: also buffers. */
 	return;
       }
       if (sf == STRFMT_STR)  /* Shortcut for plain %s. */
@@ -1028,9 +1028,334 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
       return;
     }
   }
-  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+  if (sbufx) {
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+  } else {
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+  }
+}
+
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+  recff_format(J, rd, recff_bufhdr(J), 0);
+}
+
+/* -- Buffer library fast functions --------------------------------------- */
+
+#if LJ_HASBUFFER
+
+static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
+}
+
+static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
+  emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
+}
+
+static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
+  emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
+{
+  TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
+  if (LJ_64)
+    len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  return len;
+}
+
+/* Emit typecheck for string buffer. */
+static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg)
+{
+  TRef trtype, ud = J->base[arg];
+  if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+  emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
+  return ud;
+}
+
+/* Emit BUFHDR for write to extended string buffer. */
+static TRef recff_sbufx_write(jit_State *J, TRef ud)
+{
+  TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
+  return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
+}
+
+/* Check for integer in range for the buffer API. */
+static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg)
+{
+  TRef tr = J->base[arg];
+  TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
+  if (tref_isinteger(tr)) {
+    emitir(IRTGI(IR_ULE), tr, trlim);
+  } else if (tref_isnum(tr)) {
+    tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
+    emitir(IRTGI(IR_ULE), tr, trlim);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
+    emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
+    tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
+#else
+    UNUSED(rd);
+#endif
+  } else {
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  }
+  return tr;
+}
+
+static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  SBufExt *sbx = bufV(&rd->argv[0]);
+  int iscow = (int)sbufiscow(sbx);
+  TRef trl = recff_sbufx_get_L(J, ud);
+  TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
+  TRef zero = lj_ir_kint(J, 0);
+  emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
+  if (iscow) {
+    trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
+		 LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
+			   lj_ir_kint(J, SBUF_FLAG_COW));
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
+    recff_sbufx_set_L(J, ud, trl);
+    emitir(IRT(IR_FSTORE, IRT_PGC),
+	   emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
+  } else {
+    TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
+  }
+}
+
+static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef len = recff_sbufx_len(J, trr, trw);
+  TRef trn = recff_sbufx_checkint(J, rd, 1);
+  len = emitir(IRTI(IR_MIN), len, trn);
+  trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = J->base[1];
+  if (tref_isstr(tr)) {
+    TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
+    TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
+    TRef len = recff_sbufx_checkint(J, rd, 2);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#endif
+  }  /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) return;
+  for (arg = 1; (tr = J->base[arg]); arg++) {
+    if (tref_isstr(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
+    } else if (tref_isnumber(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
+		     emitir(IRT(IR_TOSTR, IRT_STR), tr,
+			    tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
+    } else if (tref_isudata(tr)) {
+      TRef ud2 = recff_sbufx_check(J, rd, arg);
+      TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R);
+      TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W);
+      TRef len = recff_sbufx_len(J, trr, trw);
+      emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
+      trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
+    } else {
+      recff_nyiu(J, rd);
+    }
+  }
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  recff_format(J, rd, trbuf, 1);
 }
 
+static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
+  for (arg = 0; (tr = J->base[arg+1]); arg++) {
+    TRef len = recff_sbufx_len(J, trr, trw);
+    if (tref_isnil(tr)) {
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = trw;
+    } else {
+      TRef trn = recff_sbufx_checkint(J, rd, arg+1);
+      TRef tru;
+      len = emitir(IRTI(IR_MIN), len, trn);
+      tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = tru;  /* Doing the ADD before the SNEW generates better code. */
+    }
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+  }
+  rd->nres = arg;
+}
+
+static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
+}
+
+static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = recff_sbufx_len(J, trr, trw);
+}
+
+#if LJ_HASFFI
+static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
+  TRef len = recff_sbufx_checkint(J, rd, 2);
+  trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef trsz = recff_sbufx_checkint(J, rd, 1);
+  J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
+  J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
+  rd->nres = 2;
+}
+
+static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef len = recff_sbufx_checkint(J, rd, 1);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
+  TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
+  if (LJ_64)
+    left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  emitir(IRTGI(IR_ULE), len, left);
+  trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
+}
+
+static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = lj_crecord_topuint8(J, trr);
+  J->base[1] = recff_sbufx_len(J, trr, trw);
+  rd->nres = 2;
+}
+#endif
+
+static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tmp, tr = J->base[1];
+  if (!LJ_DUALNUM && tref_isinteger(tr))
+    tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+  tmp = emitir(IRT(IR_TMPREF, IRT_PGC), tr, IRTMPREF_IN1);
+  lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
+  /* No IR_USE needed, since the call is a store. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef trr, tmp;
+  IRType t;
+  tmp = emitir(IRT(IR_TMPREF, IRT_PGC), REF_NIL, IRTMPREF_OUT1);
+  trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
+  /* No IR_USE needed, since the call is a store. */
+  t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
+  J->base[0] = lj_record_vload(J, tmp, t);
+  /* The sbx->r store must be after the VLOAD type check, in case it fails. */
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef tmp, tr = J->base[0];
+  if (!LJ_DUALNUM && tref_isinteger(tr))
+    tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+  tmp = emitir(IRT(IR_TMPREF, IRT_PGC), tr, IRTMPREF_IN1);
+  J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
+  /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
+  emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
+{
+  if (tvisstr(&rd->argv[0])) {
+    GCstr *str = strV(&rd->argv[0]);
+    SBufExt sbx;
+    TRef tr, tmp;
+    IRType t;
+    tmp = emitir(IRT(IR_TMPREF, IRT_PGC), REF_NIL, IRTMPREF_OUT1);
+    tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
+    /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
+    ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
+    */
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+    memset(&sbx, 0, sizeof(SBufExt));
+    lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
+    t = (IRType)lj_serialize_peektype(&sbx);
+    J->base[0] = lj_record_vload(J, tmp, t);
+  }  /* else: Interpreter will throw. */
+}
+
+#endif
+
 /* -- Table library fast functions ---------------------------------------- */
 
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)

+ 1 - 0
src/lj_ir.c

@@ -30,6 +30,7 @@
 #endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_serialize.h"
 #include "lj_strfmt.h"
 #include "lj_prng.h"
 

+ 7 - 2
src/lj_ir.h

@@ -205,10 +205,15 @@ IRFPMDEF(FPMENUM)
   _(UDATA_META,	offsetof(GCudata, metatable)) \
   _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
   _(UDATA_FILE,	sizeof(GCudata)) \
-  _(UDATA_BUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \
+  _(SBUF_W,	sizeof(GCudata) + offsetof(SBufExt, w)) \
+  _(SBUF_E,	sizeof(GCudata) + offsetof(SBufExt, e)) \
+  _(SBUF_B,	sizeof(GCudata) + offsetof(SBufExt, b)) \
+  _(SBUF_L,	sizeof(GCudata) + offsetof(SBufExt, L)) \
+  _(SBUF_REF,	sizeof(GCudata) + offsetof(SBufExt, cowref)) \
+  _(SBUF_R,	sizeof(GCudata) + offsetof(SBufExt, r)) \
   _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
   _(CDATA_PTR,	sizeof(GCcdata)) \
-  _(CDATA_INT, sizeof(GCcdata)) \
+  _(CDATA_INT,	sizeof(GCcdata)) \
   _(CDATA_INT64, sizeof(GCcdata)) \
   _(CDATA_INT64_4, sizeof(GCcdata) + 4)
 

+ 18 - 0
src/lj_ircall.h

@@ -113,6 +113,18 @@ typedef struct CCallInfo {
 #define IRCALLCOND_FFI32(x)		NULL
 #endif
 
+#if LJ_HASBUFFER
+#define IRCALLCOND_BUFFER(x)		x
+#else
+#define IRCALLCOND_BUFFER(x)		NULL
+#endif
+
+#if LJ_HASBUFFER && LJ_HASFFI
+#define IRCALLCOND_BUFFFI(x)		x
+#else
+#define IRCALLCOND_BUFFFI(x)		NULL
+#endif
+
 #if LJ_SOFTFP
 #define XA_FP		CCI_XA
 #define XA2_FP		(CCI_XA+CCI_XA)
@@ -163,6 +175,12 @@ typedef struct CCallInfo {
   _(ANY,	lj_buf_putstr_upper,	2,  FL, PGC, CCI_T) \
   _(ANY,	lj_buf_putstr_rep,	3,   L, PGC, CCI_T) \
   _(ANY,	lj_buf_puttab,		5,   L, PGC, CCI_T) \
+  _(BUFFER,	lj_bufx_set,		4,   S, NIL, 0) \
+  _(BUFFFI,	lj_bufx_more,		2,  FS, INT, CCI_T) \
+  _(BUFFER,	lj_serialize_put,	2,  FS, PGC, CCI_T) \
+  _(BUFFER,	lj_serialize_get,	2,  FS, PTR, CCI_T) \
+  _(BUFFER,	lj_serialize_encode,	2,  FA, STR, CCI_L|CCI_T) \
+  _(BUFFER,	lj_serialize_decode,	3,   A, INT, CCI_L|CCI_T) \
   _(ANY,	lj_buf_tostr,		1,  FL, STR, CCI_T) \
   _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L|CCI_T) \
   _(ANY,	lj_tab_new1,		2,  FA, TAB, CCI_L|CCI_T) \

+ 1 - 0
src/lj_iropt.h

@@ -124,6 +124,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
 LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
+LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
 LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
 
 /* Dead-store elimination. */

+ 46 - 11
src/lj_opt_fold.c

@@ -578,6 +578,9 @@ LJFOLDF(kfold_strcmp)
 ** The compromise is to declare them as loads, emit them like stores and
 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
 ** fragments left over from CSE are eliminated by DCE.
+**
+** The string buffer methods emit a USE instead of a BUFSTR to keep the
+** chain alive.
 */
 
 LJFOLD(BUFHDR any any)
@@ -586,18 +589,38 @@ LJFOLDF(bufhdr_merge)
   return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
 }
 
-LJFOLD(BUFPUT BUFHDR BUFSTR)
-LJFOLDF(bufput_append)
+LJFOLD(BUFPUT any BUFSTR)
+LJFOLDF(bufput_bufstr)
 {
-  /* New buffer, no other buffer op inbetween and same buffer? */
-  if ((J->flags & JIT_F_OPT_FWD) &&
-      fleft->op2 == IRBUFHDR_RESET &&
-      fleft->prev == fright->op2 &&
-      fleft->op1 == IR(fright->op2)->op1) {
-    IRRef ref = fins->op1;
-    IR(ref)->op2 = IRBUFHDR_APPEND;  /* Modify BUFHDR. */
-    IR(ref)->op1 = fright->op1;
-    return ref;
+  if ((J->flags & JIT_F_OPT_FWD)) {
+    IRRef hdr = fright->op2;
+    /* New buffer, no other buffer op inbetween and same buffer? */
+    if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
+	fleft->prev == hdr &&
+	fleft->op1 == IR(hdr)->op1) {
+      IRRef ref = fins->op1;
+      IR(ref)->op2 = IRBUFHDR_APPEND;  /* Modify BUFHDR. */
+      IR(ref)->op1 = fright->op1;
+      return ref;
+    }
+    /* Replay puts to global temporary buffer. */
+    if (IR(hdr)->op2 == IRBUFHDR_RESET) {
+      IRIns *ir = IR(fright->op1);
+      /* For now only handle single string.reverse .lower .upper .rep. */
+      if (ir->o == IR_CALLL &&
+	  ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
+	  ir->op2 <= IRCALL_lj_buf_putstr_rep) {
+	IRIns *carg1 = IR(ir->op1);
+	if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
+	  IRIns *carg2 = IR(carg1->op1);
+	  if (carg2->op1 == hdr) {
+	    return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
+	  }
+	} else if (carg1->op1 == hdr) {
+	  return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
+	}
+      }
+    }
   }
   return EMITFOLD;  /* Always emit, CSE later. */
 }
@@ -2285,6 +2308,18 @@ LJFOLDF(fload_str_len_tostr)
   return NEXTFOLD;
 }
 
+LJFOLD(FLOAD any IRFL_SBUF_W)
+LJFOLD(FLOAD any IRFL_SBUF_E)
+LJFOLD(FLOAD any IRFL_SBUF_B)
+LJFOLD(FLOAD any IRFL_SBUF_L)
+LJFOLD(FLOAD any IRFL_SBUF_REF)
+LJFOLD(FLOAD any IRFL_SBUF_R)
+LJFOLDF(fload_sbuf)
+{
+  TRef tr = lj_opt_fwd_fload(J);
+  return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
+}
+
 /* The C type ID of cdata objects is immutable. */
 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
 LJFOLDF(fload_cdata_typeid_kgc)

+ 26 - 2
src/lj_opt_mem.c

@@ -620,8 +620,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
 	goto doemit;
       break;  /* Otherwise continue searching. */
     case ALIAS_MUST:
-      if (store->op2 == val)  /* Same value: drop the new store. */
-	return DROPFOLD;
+      if (store->op2 == val &&
+	  !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
+	return DROPFOLD;  /* Same value: drop the new store. */
       /* Different value: try to eliminate the redundant store. */
       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
 	IRIns *ir;
@@ -642,6 +643,29 @@ doemit:
   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
 }
 
+/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
+int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
+{
+  IRRef ref;
+  if (J->chain[IR_BUFPUT] > lim)
+    return 0;  /* Conflict. */
+  ref = J->chain[IR_CALLS];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  ref = J->chain[IR_CALLL];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  return 1;  /* No conflict. Can safely FOLD/CSE. */
+}
+
 /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
 
 /* Find cdata allocation for a reference (if any). */

+ 10 - 0
src/lj_record.c

@@ -1445,6 +1445,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
 	return 0;  /* No result yet. */
       }
     }
+#if LJ_HASBUFFER
+    /* The index table of buffer objects is treated as immutable. */
+    if (ix->mt == TREF_NIL && !ix->val &&
+	tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
+	tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
+      cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
+      TRef tr = lj_record_constify(J, val);
+      if (tr) return tr;  /* Specialize to the value, i.e. a method. */
+    }
+#endif
     /* Otherwise retry lookup with metaobject. */
     ix->tab = ix->mobj;
     copyTV(J->L, &ix->tabv, &ix->mobjv);

+ 62 - 3
src/lj_serialize.c

@@ -18,6 +18,9 @@
 #include "lj_ctype.h"
 #include "lj_cdata.h"
 #endif
+#if LJ_HASJIT
+#include "lj_ir.h"
+#endif
 #include "lj_serialize.h"
 
 /* Tags for internal serialization format. */
@@ -400,6 +403,9 @@ eob:
   return NULL;
 }
 
+/* -- External serialization API ------------------------------------------ */
+
+/* Encode to buffer. */
 SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
 {
   sbx->depth = LJ_SERIALIZE_DEPTH;
@@ -407,10 +413,63 @@ SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
   return sbx;
 }
 
-SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
+/* Decode from buffer. */
+char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
 {
-  sbx->r = serialize_get(sbx->r, sbx, o);
-  return sbx;
+  return serialize_get(sbx->r, sbx, o);
+}
+
+/* Stand-alone encoding, borrowing from global temporary buffer. */
+GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
+{
+  SBufExt sbx;
+  char *w;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
+  sbx.depth = LJ_SERIALIZE_DEPTH;
+  w = serialize_put(sbx.w, &sbx, o);
+  return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
+}
+
+/* Stand-alone decoding, copy-on-write from string. */
+void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
+{
+  SBufExt sbx;
+  char *r;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
+  /* No need to set sbx.cowref here. */
+  r = lj_serialize_get(&sbx, o);
+  if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
+}
+
+#if LJ_HASJIT
+/* Peek into buffer to find the result IRType for specialization purposes. */
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
+{
+  uint32_t tp;
+  if (serialize_ru124(sbx->r, sbx->w, &tp)) {
+    /* This must match the handling of all tags in the decoder above. */
+    switch (tp) {
+    case SER_TAG_NIL: return IRT_NIL;
+    case SER_TAG_FALSE: return IRT_FALSE;
+    case SER_TAG_TRUE: return IRT_TRUE;
+    case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
+      return IRT_LIGHTUD;
+    case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
+    case SER_TAG_NUM: return IRT_NUM;
+    case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
+    case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
+      return IRT_TAB;
+    case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
+      return IRT_CDATA;
+    case SER_TAG_DICT:
+    default:
+      return IRT_STR;
+    }
+  }
+  return IRT_NIL;  /* Will fail on actual decode. */
 }
+#endif
 
 #endif

+ 6 - 1
src/lj_serialize.h

@@ -15,7 +15,12 @@
 
 LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
 LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
-LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
+LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
+LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
+LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
+#if LJ_HASJIT
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
+#endif
 
 #endif